Skip to content

Commit e2d439e

Browse files
authored
Add tile_n=32 and tile_k=32 kernels in generator.py (PaddlePaddle#858)
1 parent 0435979 commit e2d439e

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

tools/library/scripts/generator.py

+14
Original file line numberDiff line numberDiff line change
@@ -1443,6 +1443,20 @@ def GenerateSM75_TensorOp_8816_TN(manifest, cuda_version):
14431443
TileDescription([ 64, 128, 64], 2, [2, 2, 1], math_inst, min_cc, max_cc),
14441444
TileDescription([128, 64, 64], 2, [2, 2, 1], math_inst, min_cc, max_cc),
14451445
TileDescription([ 64, 64, 64], 2, [2, 2, 1], math_inst, min_cc, max_cc),
1446+
TileDescription([ 256, 32, 64], 2, [4, 1, 1], math_inst, min_cc, max_cc),
1447+
TileDescription([ 128, 32, 64], 2, [2, 1, 1], math_inst, min_cc, max_cc),
1448+
TileDescription([ 64, 32, 64], 2, [2, 1, 1], math_inst, min_cc, max_cc),
1449+
1450+
TileDescription([256, 128, 32], 2, [4, 2, 1], math_inst, min_cc, max_cc),
1451+
TileDescription([128, 256, 32], 2, [2, 4, 1], math_inst, min_cc, max_cc),
1452+
TileDescription([128, 128, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
1453+
TileDescription([ 64, 256, 32], 2, [1, 4, 1], math_inst, min_cc, max_cc),
1454+
TileDescription([256, 64, 32], 2, [4, 1, 1], math_inst, min_cc, max_cc),
1455+
TileDescription([ 64, 128, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
1456+
TileDescription([128, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
1457+
TileDescription([ 64, 64, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc),
1458+
TileDescription([ 128, 32, 32], 2, [2, 1, 1], math_inst, min_cc, max_cc),
1459+
TileDescription([ 64, 32, 32], 2, [2, 1, 1], math_inst, min_cc, max_cc),
14461460
]
14471461

14481462
data_type = [

0 commit comments

Comments
 (0)