@@ -386,82 +386,6 @@ std::vector<int64_t> GetLoopStrides(const ir::Expr& body) {
386
386
return loop_strides;
387
387
}
388
388
389
- bool GetCanApplyGridReduce (const std::vector<ir::Expr>& op_compute_bodies,
390
- const std::vector<int64_t >& reduce_axis) {
391
- // Names of tensors that are downstream of reduce.
392
- // A tensor is downstream of reduce either if it is produced by a reduce, or
393
- // if it has data dependency on another tensor that is downstream of reduce.
394
- std::unordered_set<std::string> reduce_downstream_tensor_names;
395
-
396
- const auto IsReduceDownstream = [&](const ir::Expr& expr_block) {
397
- for (auto & expr_load : GetRValueLoads (expr_block)) {
398
- std::string load_tensor_name = expr_load.As <ir::Load>()->name ();
399
- if (reduce_downstream_tensor_names.count (load_tensor_name) > 0 ) {
400
- return true ;
401
- }
402
- }
403
- return false ;
404
- };
405
-
406
- const auto AddReduceDownstream = [&](const ir::Expr& expr_block) {
407
- auto expr_store = analyzer::GetStoreOfSBlock (expr_block);
408
- std::string store_tensor_name = expr_store.As <ir::Store>()->name ();
409
- reduce_downstream_tensor_names.insert (store_tensor_name);
410
- };
411
-
412
- const auto CheckOutputHasReduceAxis = [&](const ir::Expr& body,
413
- const ir::Expr& expr_block) {
414
- std::vector<ir::Var> all_loop_vars = GetAllForIters (body);
415
- std::unordered_set<std::string> reduce_loop_vars;
416
- for (int64_t axis : reduce_axis) {
417
- reduce_loop_vars.insert (all_loop_vars[axis]->name );
418
- }
419
-
420
- std::unordered_set<std::string> reduce_iter_vars;
421
- auto * block = expr_block.As <ir::ScheduleBlockRealize>();
422
- auto & iter_vars = block->schedule_block .As <ir::ScheduleBlock>()->iter_vars ;
423
- for (int i = 0 ; i < iter_vars.size (); i++) {
424
- if (block->iter_values [i].is_var () &&
425
- reduce_loop_vars.count (block->iter_values [i].as_var ()->name ) > 0 ) {
426
- reduce_iter_vars.insert (iter_vars[i]->name );
427
- }
428
- }
429
-
430
- // The result is true if the indices of the output tensor contain any
431
- // reduce iter vars.
432
- auto expr_store = analyzer::GetStoreOfSBlock (expr_block);
433
- for (auto & index_expr : expr_store.As <ir::Store>()->indices ) {
434
- if (index_expr.is_var () &&
435
- reduce_iter_vars.count (index_expr.as_var_ref ()->name ) > 0 ) {
436
- return true ;
437
- }
438
- }
439
- return false ;
440
- };
441
-
442
- for (const auto & body : op_compute_bodies) {
443
- ir::Expr expr_block =
444
- (ChildScheduleBlockRealizes * ScheduleBlockRealizeIsNotInit)
445
- .GetSingle (body);
446
- bool is_reduce = analyzer::IsReductionSBlock (expr_block);
447
- bool is_reduce_downstream = IsReduceDownstream (expr_block);
448
- bool output_has_reduce_axis = CheckOutputHasReduceAxis (body, expr_block);
449
-
450
- if (is_reduce_downstream || is_reduce) {
451
- AddReduceDownstream (expr_block);
452
- }
453
-
454
- // When a block is downstream of reduce, its loop iters shouldn't contain
455
- // any reduce axis. Otherwise, it broadcasts the result of reduce. If this
456
- // is the case, we cannot apply grid reduce.
457
- if (is_reduce_downstream && (is_reduce || output_has_reduce_axis)) {
458
- VLOG (4 ) << " grid reduce is prohibited by block: " << expr_block;
459
- return false ;
460
- }
461
- }
462
- return true ;
463
- }
464
-
465
389
GroupVectorizeInfo GetGroupVectorizeInfo (
466
390
const std::vector<ir::Expr>& op_compute_bodies,
467
391
const std::unordered_set<std::string>& group_args) {
0 commit comments