@@ -88,6 +88,15 @@ static int config_ag_gemm_kernel_sm80_tp4_nnodes1 = []() {
88
88
inst.add (make_gemm_meta (make_gemm_dtype_config (_FP16{}(),_FP16{}(),_FP16{}(),_FP16{}()),_Sm80{}(),_AGKernel{}(),_RCR{}(),_GemmV2{}()),make_runtime_config (8192 ,12288 ,12288 ,make_all_gather_runtime_config (4 ,1 ,0 )),make_gemm_hparams (make_gemm_v2_hparams (cute::make_tuple (64l ,64l ,32l ),cute::make_tuple (16l ,8l ,16l ),_StreamkDP{}()),None{},cute::make_tuple (256l ,128l ,32l ),_GemmStreamK{}(),3 ,_RasterAlongM{}()));
89
89
// / NVLink
90
90
inst.add (make_gemm_meta (make_gemm_dtype_config (_BF16{}(),_BF16{}(),_Void{}(),_BF16{}()),_Sm80{}(),_AGKernel{}(),_RRR{}(),_GemmV2{}()),make_runtime_config (8192 ,12288 ,12288 ,make_all_gather_runtime_config (4 ,1 ,0 )),make_gemm_hparams (make_gemm_v2_hparams (cute::make_tuple (64l ,64l ,32l ),cute::make_tuple (16l ,8l ,16l ),_StreamkDP{}()),None{},cute::make_tuple (128l ,256l ,32l ),_GemmStreamK{}(),3 ,_RasterAlongM{}()));
91
+ inst.add (make_gemm_meta (make_gemm_dtype_config (_BF16{}(),_BF16{}(),_Void{}(),_BF16{}(),_FP32{}()),_Sm80{}(),_AGKernel{}(),_RRR{}(),_GemmV2{}(),make_gemm_v2_meta (false ),None{}),make_runtime_config (2048 ,1792 ,4096 ,make_all_gather_runtime_config (4 ,1 ,0 )),make_gemm_hparams (make_gemm_v2_hparams (cute::make_tuple (64l ,64l ,32l ),cute::make_tuple (16l ,8l ,16l ),_StreamkDP{}()),None{},cute::make_tuple (64l ,128l ,64l ),_GemmStreamK{}(),3 ,_RasterAlongM{}()));
92
+ inst.add (make_gemm_meta (make_gemm_dtype_config (_FP16{}(),_FP16{}(),_Void{}(),_FP16{}(),_FP32{}()),_Sm80{}(),_AGKernel{}(),_RRR{}(),_GemmV2{}(),make_gemm_v2_meta (false ),None{}),make_runtime_config (2048 ,1792 ,4096 ,make_all_gather_runtime_config (4 ,1 ,0 )),make_gemm_hparams (make_gemm_v2_hparams (cute::make_tuple (64l ,64l ,32l ),cute::make_tuple (16l ,8l ,16l ),_StreamkDP{}()),None{},cute::make_tuple (64l ,128l ,64l ),_GemmStreamK{}(),3 ,_RasterAlongM{}()));
93
+ inst.add (make_gemm_meta (make_gemm_dtype_config (_BF16{}(),_BF16{}(),_Void{}(),_BF16{}(),_FP32{}()),_Sm80{}(),_AGKernel{}(),_RCR{}(),_GemmV2{}(),make_gemm_v2_meta (false ),None{}),make_runtime_config (2048 ,1792 ,4096 ,make_all_gather_runtime_config (4 ,1 ,0 )),make_gemm_hparams (make_gemm_v2_hparams (cute::make_tuple (64l ,64l ,32l ),cute::make_tuple (16l ,8l ,16l ),_StreamkDP{}()),None{},cute::make_tuple (64l ,128l ,64l ),_GemmStreamK{}(),3 ,_RasterAlongM{}()));
94
+ inst.add (make_gemm_meta (make_gemm_dtype_config (_FP16{}(),_FP16{}(),_Void{}(),_FP16{}(),_FP32{}()),_Sm80{}(),_AGKernel{}(),_RCR{}(),_GemmV2{}(),make_gemm_v2_meta (false ),None{}),make_runtime_config (2048 ,1792 ,4096 ,make_all_gather_runtime_config (4 ,1 ,0 )),make_gemm_hparams (make_gemm_v2_hparams (cute::make_tuple (64l ,64l ,32l ),cute::make_tuple (16l ,8l ,16l ),_StreamkDP{}()),None{},cute::make_tuple (64l ,128l ,64l ),_GemmStreamK{}(),4 ,_RasterAlongM{}()));
95
+ inst.add (make_gemm_meta (make_gemm_dtype_config (_BF16{}(),_BF16{}(),_Void{}(),_BF16{}(),_FP32{}()),_Sm80{}(),_AGKernel{}(),_RRR{}(),_GemmV2{}(),make_gemm_v2_meta (false ),None{}),make_runtime_config (2048 ,7168 ,4096 ,make_all_gather_runtime_config (4 ,1 ,0 )),make_gemm_hparams (make_gemm_v2_hparams (cute::make_tuple (64l ,64l ,32l ),cute::make_tuple (16l ,8l ,16l ),_StreamkDP{}()),None{},cute::make_tuple (128l ,128l ,32l ),_GemmStreamK{}(),3 ,_RasterAlongM{}()));
96
+ inst.add (make_gemm_meta (make_gemm_dtype_config (_FP16{}(),_FP16{}(),_Void{}(),_FP16{}(),_FP32{}()),_Sm80{}(),_AGKernel{}(),_RRR{}(),_GemmV2{}(),make_gemm_v2_meta (false ),None{}),make_runtime_config (2048 ,7168 ,4096 ,make_all_gather_runtime_config (4 ,1 ,0 )),make_gemm_hparams (make_gemm_v2_hparams (cute::make_tuple (64l ,64l ,32l ),cute::make_tuple (16l ,8l ,16l ),_StreamkDP{}()),None{},cute::make_tuple (128l ,128l ,32l ),_GemmStreamK{}(),3 ,_RasterAlongM{}()));
97
+ inst.add (make_gemm_meta (make_gemm_dtype_config (_BF16{}(),_BF16{}(),_Void{}(),_BF16{}(),_FP32{}()),_Sm80{}(),_AGKernel{}(),_RCR{}(),_GemmV2{}(),make_gemm_v2_meta (false ),None{}),make_runtime_config (2048 ,7168 ,4096 ,make_all_gather_runtime_config (4 ,1 ,0 )),make_gemm_hparams (make_gemm_v2_hparams (cute::make_tuple (64l ,64l ,32l ),cute::make_tuple (16l ,8l ,16l ),_StreamkDP{}()),None{},cute::make_tuple (128l ,128l ,32l ),_GemmStreamK{}(),3 ,_RasterAlongM{}()));
98
+ inst.add (make_gemm_meta (make_gemm_dtype_config (_FP16{}(),_FP16{}(),_Void{}(),_FP16{}(),_FP32{}()),_Sm80{}(),_AGKernel{}(),_RCR{}(),_GemmV2{}(),make_gemm_v2_meta (false ),None{}),make_runtime_config (2048 ,7168 ,4096 ,make_all_gather_runtime_config (4 ,1 ,0 )),make_gemm_hparams (make_gemm_v2_hparams (cute::make_tuple (64l ,64l ,32l ),cute::make_tuple (16l ,8l ,16l ),_StreamkSK{}()),None{},cute::make_tuple (256l ,128l ,32l ),_GemmStreamK{}(),3 ,_RasterAlongN{}()));
99
+
91
100
return 0 ;
92
101
}();
93
102
}
0 commit comments