Replies: 1 comment
-
If by backend you mean hardware accelerator or GPU, unfortunately I did not find any documentation on it but I did manage to make it register a backend with operations. Backends are contained within the
Inform GGML that you have a backend, and provide the necessary interface for GGML to interact with it. ggml_backend_reg_t ggml_backend_YOUR_BACKEND_NAME_reg(void) {
static struct ggml_backend_reg ggml_backend_YOUR_BACKEND_NAME_reg = {
/* .api_version = */ GGML_YOUR_BACKEND_NAME_BACKEND_VERSION,
/* .interface = */ ggml_backend_YOUR_BACKEND_NAME_reg_i,
/* .context = */ NULL,
};
return &ggml_backend_zdnn_reg;
}
GGML_BACKEND_DL_IMPL(ggml_backend_zdnn_reg)
static const struct ggml_backend_reg_i ggml_backend_YOUR_BACKEND_NAME_reg_i = {
/* .get_name = */ ggml_backend_YOUR_BACKEND_NAME_reg_get_name,
/* .get_device_count = */ ggml_backend_YOUR_BACKEND_NAME_reg_get_device_count,
/* .get_device = */ ggml_backend_YOUR_BACKEND_NAME_reg_get_device,
/* .get_proc_address = */ ggml_backend_YOUR_BACKEND_NAME_reg_get_proc_address,
};
static ggml_backend_dev_t ggml_backend_YOUR_BACKEND_NAME_reg_get_device(ggml_backend_reg_t reg, size_t index) {
GGML_ASSERT(index == 0);
static ggml_backend_device ggml_backend_YOUR_BACKEND_NAME_device = {
/* .interface = */ ggml_backend_YOUR_BACKEND_NAME_device_i,
/* .register = */ reg,
/* .context = */ nullptr,
};
return &ggml_backend_YOUR_BACKEND_NAME_device;
}
static void * ggml_backend_YOUR_BACKEND_NAME_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
return nullptr;
GGML_UNUSED(reg);
GGML_UNUSED(name);
}
static const struct ggml_backend_device_i ggml_backend_YOUR_BACKEND_NAME_device_i = {
/* .get_name = */ ggml_backend_YOUR_BACKEND_NAME_device_get_name,
/* .get_description = */ ggml_backend_YOUR_BACKEND_NAME_device_get_desc,
/* .get_memory = */ ggml_backend_YOUR_BACKEND_NAME_device_get_memory,
/* .get_type = */ ggml_backend_YOUR_BACKEND_NAME_device_get_type,
/* .get_props = */ ggml_backend_YOUR_BACKEND_NAME_device_get_props,
/* .init_backend = */ ggml_backend_YOUR_BACKEND_NAME_device_init_backend,
/* .get_buffer_type = */ ggml_backend_YOUR_BACKEND_NAME_device_get_buffer_type,
/* .get_host_buffer_type = */ NULL,
/* .buffer_from_host_ptr = */ ggml_backend_YOUR_BACKEND_NAME_device_buffer_from_host_ptr,
/* .supports_op = */ ggml_backend_YOUR_BACKEND_NAME_device_supports_op,
/* .supports_buft = */ ggml_backend_YOUR_BACKEND_NAME_device_supports_buft,
/* .offload_op = */ NULL,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_synchronize = */ NULL,
};
static bool ggml_backend_YOUR_BACKEND_NAME_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
const struct ggml_tensor * src0 = op->src[0];
const struct ggml_tensor * src1 = op->src[1];
switch (op->op) {
// GGML required ops
case GGML_OP_NONE:
case GGML_OP_RESHAPE:
case GGML_OP_VIEW:
case GGML_OP_PERMUTE:
case GGML_OP_TRANSPOSE:
break;
case GGML_OP_ADD:
case GGML_OP_ADD1:
case GGML_OP_SUB:
case GGML_OP_MUL:
case GGML_OP_DIV:
case GGML_OP_SQRT:
case GGML_OP_LOG:
case GGML_OP_NORM:
case GGML_OP_MUL_MAT:
case GGML_OP_MUL_MAT_ID:
case GGML_OP_SOFT_MAX:
case GGML_OP_LEAKY_RELU:
return false; // TODO: disable all support first to showcase device reg
case GGML_OP_UNARY:
switch (ggml_get_unary_op(op)) {
case GGML_UNARY_OP_ABS:
case GGML_UNARY_OP_SGN:
case GGML_UNARY_OP_NEG:
case GGML_UNARY_OP_STEP:
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_ELU:
case GGML_UNARY_OP_RELU:
case GGML_UNARY_OP_SIGMOID:
case GGML_UNARY_OP_GELU:
case GGML_UNARY_OP_GELU_QUICK:
case GGML_UNARY_OP_SILU:
case GGML_UNARY_OP_HARDSWISH:
case GGML_UNARY_OP_HARDSIGMOID:
case GGML_UNARY_OP_EXP:
break;
default:
return false;
}
default:
return false;
}
return true;
GGML_UNUSED(dev);
}
inline bool ggml_YOUR_BACKEND_NAME_compute_forward(ggml_backend_YOUR_BACKEND_NAME_context & ctx,
ggml_tensor * dst) {
switch (dst->op) {
case GGML_OP_ADD:
case GGML_OP_ADD1:
case GGML_OP_SUB:
case GGML_OP_MUL:
case GGML_OP_DIV:
case GGML_OP_SQRT:
case GGML_OP_LOG:
case GGML_OP_NORM:
case GGML_OP_MUL_MAT:
case GGML_OP_MUL_MAT_ID:
case GGML_OP_SOFT_MAX:
case GGML_OP_LEAKY_RELU:
return false;
case GGML_OP_UNARY:
switch (ggml_get_unary_op(dst)) {
case GGML_UNARY_OP_ABS:
case GGML_UNARY_OP_SGN:
case GGML_UNARY_OP_NEG:
case GGML_UNARY_OP_STEP:
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_ELU:
case GGML_UNARY_OP_RELU:
case GGML_UNARY_OP_SIGMOID:
case GGML_UNARY_OP_GELU:
case GGML_UNARY_OP_GELU_QUICK:
case GGML_UNARY_OP_SILU:
case GGML_UNARY_OP_HARDSWISH:
case GGML_UNARY_OP_HARDSIGMOID:
return false;
case GGML_UNARY_OP_EXP:
break;
default:
return false;
}
default:
return false;
}
return true;
} That should be the main bulk of registering a device and getting the compute operation forwarded to your backend. Please take note that GGML's matrix multiplication is computed as You may choose to refer to my zDNN implementation here: https://github.com/taronaeo/llama.cpp-s390x/blob/zdnn-accelerator-backend/ggml/src/ggml-zdnn/ggml-zdnn.cpp |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
How to add a new backend for llama.cpp? Is there any docments?
Beta Was this translation helpful? Give feedback.
All reactions