@@ -1330,60 +1330,68 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
1330
1330
}
1331
1331
1332
1332
1333
- static void aclnn_get_slope_inner (ggml_backend_cann_context& ctx, void * slope_buffer, float m, int64_t size, float start, float stop, float step){
1333
+ static void aclnn_get_slope_inner (ggml_backend_cann_context& ctx, void * slope_buffer,
1334
+ float m, int64_t size, float start, float stop, float step){
1334
1335
int64_t ne[] = {size};
1335
1336
size_t nb[] = {sizeof (float )};
1336
1337
1337
- ggml_cann_pool_alloc arange_allocator (ctx.pool (),size * sizeof (float ));
1338
- void * arange_buffer = arange_allocator.get ();
1339
-
1340
- aclTensor* arange_tensor = ggml_cann_create_tensor (
1341
- arange_buffer, ACL_FLOAT,
1342
- sizeof (float ), ne, nb, 1 );
1338
+ ggml_cann_pool_alloc arange_allocator (ctx.pool (), size * sizeof (float ));
1339
+ void * arange_buffer = arange_allocator.get ();
1340
+
1341
+ aclTensor * arange_tensor = ggml_cann_create_tensor (
1342
+ arange_buffer, ACL_FLOAT, sizeof (float ), ne, nb, 1 );
1343
1343
aclnn_arange (ctx, arange_tensor, start, stop, step, size);
1344
1344
1345
- aclTensor* slope_tensor = ggml_cann_create_tensor (
1346
- slope_buffer, ACL_FLOAT,
1347
- sizeof (float ), ne, nb, 1 );
1345
+ aclTensor * slope_tensor = ggml_cann_create_tensor (
1346
+ slope_buffer, ACL_FLOAT, sizeof (float ), ne, nb, 1 );
1348
1347
1349
- aclScalar* sc = aclCreateScalar (&m, aclDataType::ACL_FLOAT);
1348
+ aclScalar * sc = aclCreateScalar (&m, aclDataType::ACL_FLOAT);
1350
1349
1351
1350
GGML_CANN_CALL_ACLNN_OP (ctx, PowScalarTensor, sc, arange_tensor, slope_tensor);
1352
1351
ggml_cann_release_resources (ctx, sc, arange_tensor, slope_tensor);
1353
1352
}
1354
1353
1355
- static void aclnn_get_slope (ggml_backend_cann_context& ctx, int64_t n_head, void * slope_buffer, float max_bias) {
1356
- const int n_head_log2 = 1u << (uint32_t )floor (log2 (n_head));
1354
+ static void aclnn_get_slope (ggml_backend_cann_context & ctx, int64_t n_head,
1355
+ void * slope_buffer, float max_bias) {
1356
+ const int n_head_log2 = 1u << (uint32_t ) floor (log2 (n_head));
1357
1357
1358
1358
float m0 = powf (2 .0f , -(max_bias) / n_head_log2);
1359
1359
float m1 = powf (2 .0f , -(max_bias / 2 .0f ) / n_head_log2);
1360
1360
1361
- // const float slope = (max_bias > 0.0f) ? h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1) : 1.0f;
1361
+ // const float slope = (max_bias > 0.0f) ?
1362
+ // h < n_head_log2 ?
1363
+ // powf(m0, h + 1) :
1364
+ // powf(m1, 2*(h - n_head_log2) + 1) :
1365
+ // 1.0f;
1362
1366
// arange1
1363
1367
float start = 0 + 1 ;
1364
- float end = (n_head_log2 - 1 ) + 1 ;
1365
- float step = 1 ;
1368
+ float end = (n_head_log2 - 1 ) + 1 ;
1369
+ float step = 1 ;
1366
1370
float count = n_head_log2;
1367
1371
// end needs to be +1 because aclnn uses a left-closed, right-open interval.
1368
1372
aclnn_get_slope_inner (ctx, slope_buffer, m0, count, start, end + 1 , step);
1369
1373
if (n_head_log2 < n_head) {
1370
1374
// arange2
1371
- start = 2 * (n_head_log2 - n_head_log2) + 1 ;
1372
- end = 2 * ((n_head - 1 ) - n_head_log2) + 1 ;
1373
- step = 2 ;
1375
+ start = 2 * (n_head_log2 - n_head_log2) + 1 ;
1376
+ end = 2 * ((n_head - 1 ) - n_head_log2) + 1 ;
1377
+ step = 2 ;
1374
1378
count = n_head - n_head_log2;
1375
- aclnn_get_slope_inner (ctx, (char *)slope_buffer + n_head_log2* sizeof (float ), m1, count, start, end + 1 , step);
1379
+ aclnn_get_slope_inner (
1380
+ ctx, (char *) slope_buffer + n_head_log2 * sizeof (float ),
1381
+ m1, count, start, end + 1 , step);
1376
1382
}
1377
1383
}
1378
1384
1379
- static void aclnn_add_alibi (ggml_backend_cann_context& ctx, ggml_tensor* mask, ggml_tensor* dst, void * dst_ptr, float max_bias) {
1385
+ static void aclnn_add_alibi (ggml_backend_cann_context& ctx, ggml_tensor* mask,
1386
+ ggml_tensor* dst, void * dst_ptr, float max_bias) {
1380
1387
void * slope_buffer = nullptr ;
1381
1388
void * bias_buffer = nullptr ;
1382
1389
1383
1390
int64_t n_heads = dst->ne [2 ];
1384
1391
ggml_cann_pool_alloc slope_allocator (ctx.pool (), n_heads * sizeof (float ));
1385
1392
slope_buffer = slope_allocator.get ();
1386
- ggml_cann_pool_alloc bias_allocator (ctx.pool (), ggml_nelements (dst) * ggml_element_size (dst));
1393
+ ggml_cann_pool_alloc bias_allocator (
1394
+ ctx.pool (), ggml_nelements (dst) * ggml_element_size (dst));
1387
1395
bias_buffer = bias_allocator.get ();
1388
1396
1389
1397
if (max_bias > 0 .0f ) {
@@ -1396,44 +1404,46 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask, g
1396
1404
int64_t nr3 = dst->ne [3 ] / mask->ne [3 ];
1397
1405
1398
1406
// broadcast the mask across rows
1399
- int64_t mask_ne[] = {mask->ne [0 ], dst->ne [1 ], mask->ne [2 ], 1 , mask->ne [3 ], 1 };
1400
- size_t mask_nb[GGML_MAX_DIMS + 2 ];
1401
- mask_nb[0 ] = mask->nb [0 ];
1402
- mask_nb[1 ] = mask->nb [1 ];
1403
- mask_nb[2 ] = mask->nb [2 ];
1404
- mask_nb[3 ] = mask->nb [2 ];
1405
- mask_nb[4 ] = mask->nb [3 ];
1406
- mask_nb[5 ] = mask->nb [3 ];
1407
-
1408
- // ne2 and ne3 may be integer multiples of the mask.
1409
- int64_t dst_ne[] = {dst->ne [0 ], dst->ne [1 ], mask->ne [2 ], nr2, mask->ne [3 ], nr3};
1410
- size_t dst_nb[GGML_MAX_DIMS + 2 ];
1411
- dst_nb[0 ] = ggml_element_size (dst);
1412
- for (int i = 1 ;i<GGML_MAX_DIMS + 2 ;i++) {
1413
- dst_nb[i] = dst_nb[i-1 ]* dst_ne[i-1 ];
1414
- }
1407
+ int64_t mask_ne[] = { mask->ne [0 ], dst->ne [1 ], mask->ne [2 ], 1 , mask->ne [3 ], 1 };
1408
+ size_t mask_nb[] = {
1409
+ mask_nb[0 ] = mask->nb [0 ], mask_nb[1 ] = mask->nb [1 ], mask_nb[2 ] = mask->nb [2 ],
1410
+ mask_nb[3 ] = mask->nb [2 ], mask_nb[4 ] = mask->nb [3 ], mask_nb[5 ] = mask->nb [3 ]
1411
+ };
1412
+
1413
+ int64_t dst_ne[] = { dst->ne [0 ], dst->ne [1 ], mask->ne [2 ], nr2, mask->ne [3 ], nr3 };
1414
+ size_t dst_nb[] = {
1415
+ dst_nb[0 ] = dst->nb [0 ], dst_nb[1 ] = dst->nb [1 ], dst_nb[2 ] = dst->nb [2 ],
1416
+ dst_nb[3 ] = dst->nb [2 ], dst_nb[4 ] = dst->nb [3 ], dst_nb[5 ] = dst->nb [3 ]
1417
+ };
1415
1418
1416
1419
// slope is a 1 dim tensor, slope.ne2 == dst.ne2
1417
- int64_t slope_ne[] = {1 , 1 , mask->ne [2 ], nr2, 1 , 1 };
1418
- size_t slope_nb[GGML_MAX_DIMS + 2 ];
1420
+ int64_t slope_ne[] = { 1 , 1 , mask->ne [2 ], nr2, 1 , 1 };
1421
+ size_t slope_nb[GGML_MAX_DIMS + 2 ];
1419
1422
slope_nb[0 ] = sizeof (float );
1420
- for (int i = 1 ;i< GGML_MAX_DIMS + 2 ;i++) {
1421
- slope_nb[i] = slope_nb[i- 1 ] * slope_ne[i- 1 ];
1423
+ for (int i = 1 ; i < GGML_MAX_DIMS + 2 ; i++) {
1424
+ slope_nb[i] = slope_nb[i - 1 ] * slope_ne[i - 1 ];
1422
1425
}
1423
1426
1424
- aclTensor* acl_slope = ggml_cann_create_tensor (slope_buffer, ACL_FLOAT, sizeof (float ), slope_ne, slope_nb, GGML_MAX_DIMS + 2 );
1425
- aclTensor* acl_mask = ggml_cann_create_tensor (mask, mask_ne, mask_nb, GGML_MAX_DIMS + 2 );
1426
- aclTensor* acl_dst = ggml_cann_create_tensor (dst_ptr, ggml_cann_type_mapping (dst->type ),
1427
- ggml_type_size (dst->type ), dst_ne, dst_nb, GGML_MAX_DIMS + 2 );
1428
-
1427
+ aclTensor * acl_slope = ggml_cann_create_tensor (
1428
+ slope_buffer, ACL_FLOAT, sizeof (float ),
1429
+ slope_ne, slope_nb, GGML_MAX_DIMS + 2 );
1430
+ aclTensor * acl_mask = ggml_cann_create_tensor (
1431
+ mask, mask_ne, mask_nb, GGML_MAX_DIMS + 2 );
1432
+ aclTensor * acl_dst = ggml_cann_create_tensor (
1433
+ dst_ptr, ggml_cann_type_mapping (dst->type ),
1434
+ ggml_type_size (dst->type ), dst_ne, dst_nb,
1435
+ GGML_MAX_DIMS + 2 );
1436
+
1429
1437
if (max_bias > 0 .0f ) {
1430
- int64_t bias_ne[] = {mask->ne [0 ], dst->ne [1 ], mask->ne [2 ], nr2, mask->ne [3 ], 1 };
1431
- size_t bias_nb[GGML_MAX_DIMS + 2 ];
1438
+ int64_t bias_ne[] = { mask->ne [0 ], dst->ne [1 ], mask->ne [2 ], nr2, mask->ne [3 ], 1 };
1439
+ size_t bias_nb[GGML_MAX_DIMS + 2 ];
1432
1440
bias_nb[0 ] = sizeof (float );
1433
- for (int i = 1 ;i< GGML_MAX_DIMS + 2 ;i++) {
1434
- bias_nb[i] = bias_nb[i- 1 ] * bias_ne[i- 1 ];
1441
+ for (int i = 1 ; i < GGML_MAX_DIMS + 2 ; i++) {
1442
+ bias_nb[i] = bias_nb[i - 1 ] * bias_ne[i - 1 ];
1435
1443
}
1436
- aclTensor* bias_tensor = ggml_cann_create_tensor (bias_buffer, ACL_FLOAT, sizeof (float ), bias_ne, bias_nb, GGML_MAX_DIMS + 2 );
1444
+ aclTensor * bias_tensor = ggml_cann_create_tensor (
1445
+ bias_buffer, ACL_FLOAT, sizeof (float ),
1446
+ bias_ne, bias_nb, GGML_MAX_DIMS + 2 );
1437
1447
1438
1448
aclnn_mul (ctx, acl_slope, acl_mask, bias_tensor);
1439
1449
aclnn_add (ctx, acl_dst, bias_tensor);
@@ -1444,7 +1454,7 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask, g
1444
1454
ggml_cann_release_resources (ctx, acl_slope, acl_mask, acl_dst);
1445
1455
}
1446
1456
1447
- void ggml_cann_cpy (ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1457
+ void ggml_cann_cpy (ggml_backend_cann_context & ctx, ggml_tensor * dst) {
1448
1458
ggml_cann_dup (ctx, dst);
1449
1459
}
1450
1460
@@ -1462,31 +1472,31 @@ void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1462
1472
* @param acl_dst The destination tensor where the softmax results will be
1463
1473
* stored.
1464
1474
*/
1465
- static void aclnn_softmax (ggml_backend_cann_context& ctx, aclTensor* acl_src,
1466
- int64_t dim, aclTensor* acl_dst) {
1475
+ static void aclnn_softmax (ggml_backend_cann_context & ctx,
1476
+ aclTensor * acl_src, int64_t dim, aclTensor * acl_dst) {
1467
1477
GGML_CANN_CALL_ACLNN_OP (ctx, Softmax, acl_src, dim, acl_dst);
1468
1478
}
1469
1479
1470
- void ggml_cann_softmax (ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1471
- ggml_tensor* src0 = dst->src [0 ];
1472
- ggml_tensor* src1 = dst->src [1 ]; // mask
1480
+ void ggml_cann_softmax (ggml_backend_cann_context & ctx, ggml_tensor * dst) {
1481
+ ggml_tensor * src0 = dst->src [0 ];
1482
+ ggml_tensor * src1 = dst->src [1 ]; // mask
1473
1483
1474
- aclTensor* acl_src0 = ggml_cann_create_tensor (src0);
1475
- aclTensor* acl_dst = ggml_cann_create_tensor (dst);
1484
+ aclTensor * acl_src0 = ggml_cann_create_tensor (src0);
1485
+ aclTensor * acl_dst = ggml_cann_create_tensor (dst);
1476
1486
1477
- float scale = 1 .0f ;
1487
+ float scale = 1 .0f ;
1478
1488
float max_bias = 0 .0f ;
1479
1489
1480
- memcpy (&scale, (float *) dst->op_params + 0 , sizeof (float ));
1481
- memcpy (&max_bias, (float *) dst->op_params + 1 , sizeof (float ));
1490
+ memcpy (&scale, (float *) dst->op_params + 0 , sizeof (float ));
1491
+ memcpy (&max_bias, (float *) dst->op_params + 1 , sizeof (float ));
1482
1492
1483
1493
// input mul scale
1484
- aclScalar* acl_scale = aclCreateScalar (&scale, aclDataType::ACL_FLOAT);
1494
+ aclScalar * acl_scale = aclCreateScalar (&scale, aclDataType::ACL_FLOAT);
1485
1495
ggml_cann_pool_alloc src_tensor_allocator (ctx.pool (), ggml_nbytes (src0));
1486
1496
void * src_tensor_buffer = src_tensor_allocator.get ();
1487
1497
aclTensor* softmax_tensor = ggml_cann_create_tensor (
1488
- src_tensor_buffer, ggml_cann_type_mapping (src0->type ), ggml_element_size (src0), src0-> ne ,
1489
- src0-> nb , GGML_MAX_DIMS);
1498
+ src_tensor_buffer, ggml_cann_type_mapping (src0->type ),
1499
+ ggml_element_size ( src0), src0-> ne , src0-> nb , GGML_MAX_DIMS);
1490
1500
1491
1501
aclnn_muls (ctx, acl_src0, scale, softmax_tensor, false );
1492
1502
@@ -1496,8 +1506,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1496
1506
}
1497
1507
// softmax
1498
1508
aclnn_softmax (ctx, softmax_tensor, 3 , acl_dst);
1499
- ggml_cann_release_resources (ctx, acl_src0, acl_dst,
1500
- acl_scale, softmax_tensor);
1509
+ ggml_cann_release_resources (ctx, acl_src0, acl_dst, acl_scale, softmax_tensor);
1501
1510
}
1502
1511
1503
1512
/* *
0 commit comments