Skip to content

Commit 5007489

Browse files
committed
add comments
1 parent 927987b commit 5007489

File tree

2 files changed

+89
-26
lines changed

2 files changed

+89
-26
lines changed

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 89 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,30 +1329,74 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
13291329
GGML_CANN_CALL_ACLNN_OP(ctx, InplacePowTensorTensor, acl_dst, acl_exp);
13301330
}
13311331

1332-
1332+
/**
1333+
* @brief Generate a range of values and apply a scalar base exponentiation.
1334+
*
1335+
* This function creates an evenly spaced sequence from `start` to `stop` (exclusive),
1336+
* with step size `step`, stores it in a temporary buffer, and then computes:
1337+
*
1338+
* @f[
1339+
* slope[i] = m^{\left( start + i \cdot step \right)}, \quad 0 \le i < size
1340+
* @f]
1341+
*
1342+
* The results are written to the provided @p slope_buffer.
1343+
*
1344+
* @param ctx CANN backend context for memory allocation and operator execution.
1345+
* @param slope_buffer Pointer to the output buffer (float array) for the computed slope values.
1346+
* @param m Scalar base for the exponentiation.
1347+
* @param size Number of elements in the generated sequence.
1348+
* @param start Starting exponent offset.
1349+
* @param stop Stopping exponent offset (exclusive).
1350+
* @param step Step size for the exponent increment.
1351+
*/
13331352
static void aclnn_get_slope_inner(ggml_backend_cann_context& ctx, void* slope_buffer,
13341353
float m, int64_t size, float start, float stop, float step){
13351354
int64_t ne[] = {size};
13361355
size_t nb[] = {sizeof(float)};
13371356

13381357
ggml_cann_pool_alloc arange_allocator(ctx.pool(), size * sizeof(float));
1339-
void * arange_buffer = arange_allocator.get();
1358+
void* arange_buffer = arange_allocator.get();
13401359

1341-
aclTensor * arange_tensor = ggml_cann_create_tensor(
1360+
aclTensor* arange_tensor = ggml_cann_create_tensor(
13421361
arange_buffer, ACL_FLOAT, sizeof(float), ne, nb, 1);
13431362
aclnn_arange(ctx, arange_tensor, start, stop, step, size);
13441363

1345-
aclTensor * slope_tensor = ggml_cann_create_tensor(
1364+
aclTensor* slope_tensor = ggml_cann_create_tensor(
13461365
slope_buffer, ACL_FLOAT, sizeof(float), ne, nb, 1);
13471366

1348-
aclScalar * sc = aclCreateScalar(&m, aclDataType::ACL_FLOAT);
1367+
aclScalar* sc = aclCreateScalar(&m, aclDataType::ACL_FLOAT);
13491368

13501369
GGML_CANN_CALL_ACLNN_OP(ctx, PowScalarTensor, sc, arange_tensor, slope_tensor);
13511370
ggml_cann_release_resources(ctx, sc, arange_tensor, slope_tensor);
13521371
}
13531372

1373+
/**
1374+
* @brief Compute slope values for multiple attention heads based on ALiBi bias parameters.
1375+
*
1376+
* This function generates slope values for each attention head according to the ALiBi
1377+
* (Attention with Linear Biases) method. It splits the computation into two ranges depending
1378+
* on whether the head index is less than @p n_head_log2 or not, and uses different base values
1379+
* (`m0` and `m1`) for the exponentiation.
1380+
*
1381+
* @f[
1382+
* slope[h] =
1383+
* \begin{cases}
1384+
* m_0^{(h + 1)}, & h < n\_head\_log2 \\
1385+
* m_1^{\left( 2 \cdot (h - n\_head\_log2) + 1 \right)}, & h \geq n\_head\_log2
1386+
* \end{cases}
1387+
* \quad , \quad \text{if } max\_bias > 0
1388+
* @f]
1389+
*
1390+
* If @p max_bias <= 0, all slope values are set to 1.0.
1391+
*
1392+
* @param ctx CANN backend context for memory allocation and operator execution.
1393+
* @param n_head Total number of attention heads.
1394+
* @param slope_buffer Pointer to the output buffer (float array) for storing slopes.
1395+
* @param max_bias Maximum bias value for slope computation.
1396+
*
1397+
*/
13541398
static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
1355-
void * slope_buffer, float max_bias) {
1399+
void* slope_buffer, float max_bias) {
13561400
const int n_head_log2 = 1u << (uint32_t) floor(log2(n_head));
13571401

13581402
float m0 = powf(2.0f, -(max_bias) / n_head_log2);
@@ -1382,24 +1426,43 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
13821426
}
13831427
}
13841428

1429+
/**
1430+
* @brief Add ALiBi (Attention with Linear Biases) positional biases to the attention mask.
1431+
*
1432+
* This function computes the ALiBi slopes for each attention head (if max_bias > 0),
1433+
* multiplies them with the attention mask to produce bias tensors, and adds these biases
1434+
* to the destination tensor (@p dst).
1435+
*
1436+
* The function performs necessary broadcasting of the mask and slope tensors to match
1437+
* the shape of the destination tensor, then applies element-wise multiplication and addition
1438+
* using CANN operators.
1439+
*
1440+
* @param ctx CANN backend context for memory management and operator execution.
1441+
* @param mask Input attention mask tensor, assumed to be contiguous.
1442+
* @param dst Destination tensor to which ALiBi biases will be added.
1443+
* @param dst_ptr Pointer to the memory of the destination tensor.
1444+
* @param max_bias Maximum bias value controlling the slope scaling.
1445+
*
1446+
* @note
1447+
* - Write data into dst_ptr using only the shape information of the dst tensor.
1448+
* - `GGML_MAX_DIMS + 2` is used to extend tensor dimensions for broadcasting.
1449+
*/
13851450
static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask,
13861451
ggml_tensor* dst, void* dst_ptr, float max_bias) {
13871452
void* slope_buffer = nullptr;
13881453
void* bias_buffer = nullptr;
13891454

1390-
int64_t n_heads = dst->ne[2];
1391-
ggml_cann_pool_alloc slope_allocator(ctx.pool(), n_heads * sizeof(float));
1392-
slope_buffer = slope_allocator.get();
1393-
ggml_cann_pool_alloc bias_allocator(
1394-
ctx.pool(), ggml_nelements(dst) * ggml_element_size(dst));
1395-
bias_buffer = bias_allocator.get();
1396-
13971455
if (max_bias > 0.0f) {
1456+
int64_t n_heads = dst->ne[2];
1457+
ggml_cann_pool_alloc slope_allocator(ctx.pool(), n_heads * sizeof(float));
1458+
slope_buffer = slope_allocator.get();
1459+
ggml_cann_pool_alloc bias_allocator(
1460+
ctx.pool(), ggml_nelements(dst) * ggml_element_size(dst));
1461+
bias_buffer = bias_allocator.get();
13981462
aclnn_get_slope(ctx, n_heads, slope_buffer, max_bias);
13991463
}
14001464

14011465
// broadcast for mask, slop and dst;
1402-
GGML_ASSERT(ggml_is_contiguous(mask));
14031466
int64_t nr2 = dst->ne[2] / mask->ne[2];
14041467
int64_t nr3 = dst->ne[3] / mask->ne[3];
14051468

@@ -1424,12 +1487,14 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask,
14241487
slope_nb[i] = slope_nb[i - 1] * slope_ne[i - 1];
14251488
}
14261489

1427-
aclTensor * acl_slope = ggml_cann_create_tensor(
1490+
aclTensor* acl_slope = ggml_cann_create_tensor(
14281491
slope_buffer, ACL_FLOAT, sizeof(float),
14291492
slope_ne, slope_nb, GGML_MAX_DIMS + 2);
1430-
aclTensor * acl_mask = ggml_cann_create_tensor(
1493+
aclTensor* acl_mask = ggml_cann_create_tensor(
14311494
mask, mask_ne, mask_nb, GGML_MAX_DIMS + 2);
1432-
aclTensor * acl_dst = ggml_cann_create_tensor(
1495+
1496+
// write data into dst_ptr using only the shape information of the dst tensor.
1497+
aclTensor* acl_dst = ggml_cann_create_tensor(
14331498
dst_ptr, ggml_cann_type_mapping(dst->type),
14341499
ggml_type_size(dst->type), dst_ne, dst_nb,
14351500
GGML_MAX_DIMS + 2);
@@ -1441,7 +1506,7 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask,
14411506
for (int i = 1; i < GGML_MAX_DIMS + 2; i++) {
14421507
bias_nb[i] = bias_nb[i - 1] * bias_ne[i - 1];
14431508
}
1444-
aclTensor * bias_tensor = ggml_cann_create_tensor(
1509+
aclTensor* bias_tensor = ggml_cann_create_tensor(
14451510
bias_buffer, ACL_FLOAT, sizeof(float),
14461511
bias_ne, bias_nb, GGML_MAX_DIMS + 2);
14471512

@@ -1473,16 +1538,16 @@ void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
14731538
* stored.
14741539
*/
14751540
static void aclnn_softmax(ggml_backend_cann_context & ctx,
1476-
aclTensor * acl_src, int64_t dim, aclTensor * acl_dst) {
1541+
aclTensor* acl_src, int64_t dim, aclTensor * acl_dst) {
14771542
GGML_CANN_CALL_ACLNN_OP(ctx, Softmax, acl_src, dim, acl_dst);
14781543
}
14791544

14801545
void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
1481-
ggml_tensor * src0 = dst->src[0];
1482-
ggml_tensor * src1 = dst->src[1]; // mask
1546+
ggml_tensor* src0 = dst->src[0];
1547+
ggml_tensor* src1 = dst->src[1]; // mask
14831548

1484-
aclTensor * acl_src0 = ggml_cann_create_tensor(src0);
1485-
aclTensor * acl_dst = ggml_cann_create_tensor(dst);
1549+
aclTensor* acl_src0 = ggml_cann_create_tensor(src0);
1550+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
14861551

14871552
float scale = 1.0f;
14881553
float max_bias = 0.0f;
@@ -1491,7 +1556,7 @@ void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
14911556
memcpy(&max_bias, (float *) dst->op_params + 1, sizeof(float));
14921557

14931558
// input mul scale
1494-
aclScalar * acl_scale = aclCreateScalar(&scale, aclDataType::ACL_FLOAT);
1559+
aclScalar* acl_scale = aclCreateScalar(&scale, aclDataType::ACL_FLOAT);
14951560
ggml_cann_pool_alloc src_tensor_allocator(ctx.pool(), ggml_nbytes(src0));
14961561
void* src_tensor_buffer = src_tensor_allocator.get();
14971562
aclTensor* softmax_tensor = ggml_cann_create_tensor(

tests/test-backend-ops.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5877,8 +5877,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
58775877
exponent <<= 1;
58785878
}
58795879
#endif
5880-
// SOFT_MAX(type=f32,ne=[16,16,1,3],mask=1,sinks=0,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000)
5881-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 16, 1, 3}, 1, 0, GGML_TYPE_F32, {3, 1}, 1, 8));
58825880
for (bool mask : {false, true}) {
58835881
for (bool sinks : {false, true}) {
58845882
for (float max_bias : {0.0f, 8.0f}) {

0 commit comments

Comments
 (0)