Skip to content

Commit 207e34a

Browse files
authored
[CINN] Explicitly set launch bounds for grid reduce (#72191)
1 parent e6194b1 commit 207e34a

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

paddle/cinn/backends/codegen_gpu_dev.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,15 @@ void CodeGenGpuDev::PrintFunctionDeclaration(const ir::_LoweredFunc_ *op) {
244244
if (!has_symbol_in_thread_num) {
245245
str_ += "__launch_bounds__(";
246246
str_ += std::to_string(thread_num);
247+
// Explicitly set min_blocks_per_sm for grid reduce to prevent launch
248+
// failure.
249+
if (!op->temp_spaces.empty()) {
250+
int min_blocks_per_sm = 1024 / thread_num;
251+
if (min_blocks_per_sm > 1) {
252+
str_ += ", ";
253+
str_ += std::to_string(min_blocks_per_sm);
254+
}
255+
}
247256
str_ += ") ";
248257
}
249258
}

0 commit comments

Comments
 (0)