Skip to content

【CINN】Optimize use of simplify #71321

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions paddle/cinn/backends/codegen_c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ void CodeGenC::Visit(const ir::Mul *op) { IrPrinter::Visit(op); }
void CodeGenC::Visit(const ir::Div *op) { IrPrinter::Visit(op); }
void CodeGenC::Visit(const ir::Mod *op) {
auto copied = op->b();
optim::Simplify(&copied);
copied = optim::ArithSimplify(copied);
if (copied.is_constant()) {
int temp = static_cast<int>(copied.get_constant());
if ((temp & (temp - 1)) == 0) {
Expand Down Expand Up @@ -891,7 +891,7 @@ void CodeGenC::Visit(const ir::_LoweredFunc_ *op) {

Expr func_body = ir::Block::Make(new_body);

optim::SimplifyBlocks(&func_body);
optim::SimplifyUnitBlock(&func_body);

IrPrinter::Visit(func_body);
}
Expand Down
6 changes: 3 additions & 3 deletions paddle/cinn/backends/codegen_gpu_dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ void CodeGenGpuDev::Visit(const ir::_LoweredFunc_ *op) {
ir::stmt::BlockRef func_body_block = ir::stmt::BlockRef(new_body_stmts);

// Use ir_simplify when pass updated.
// optim::SimplifyBlocks(&func_body);
// optim::SimplifyUnitBlock(&func_body);
// // Make sure that the function's body is wrapped by a block
// if (!func_body.As<ir::Block>()) {
// func_body = ir::Block::Make({func_body});
Expand Down Expand Up @@ -320,7 +320,7 @@ void CodeGenGpuDev::PrintTempBufferCreation(const ir::Buffer &buffer) {
for (int i = 0; i < buffer->shape.size(); i++) {
buffer_size = buffer_size * buffer->shape[i];
}
optim::Simplify(&buffer_size);
buffer_size = optim::ArithSimplify(buffer_size);
bool has_symbolic_constant = false;
ir::ir_utils::CollectIRNodes(buffer_size, [&](const Expr *x) {
if (x->as_var()) {
Expand Down Expand Up @@ -352,7 +352,7 @@ void CodeGenGpuDev::PrintTempBufferCreation(const ir::Buffer &buffer) {
int type_bytes = buffer->dtype.bytes();
dyn_shared_mem_offset_ =
dyn_shared_mem_offset_ + buffer_size * Expr(type_bytes);
optim::Simplify(&dyn_shared_mem_offset_);
dyn_shared_mem_offset_ = optim::ArithSimplify(dyn_shared_mem_offset_);
VLOG(6) << "dyn_shared_mem_offset_ = " << dyn_shared_mem_offset_;
} else if (buffer->memory_type == ir::MemoryType::GPULocal) {
// print func of static allocation
Expand Down
6 changes: 3 additions & 3 deletions paddle/cinn/backends/sycl/codegen_sycl_dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ void CodeGenSyclDevice::PrintFunctionBody(const ir::_LoweredFunc_ *op) {
APPEND_TO_NEW_BODY_STMTS(dealloc_temp_buffer_stmts);
ir::stmt::BlockRef func_body_block = ir::stmt::BlockRef(new_body_stmts);
// Use ir_simplify when pass updated.
// optim::SimplifyBlocks(&func_body);
// optim::SimplifyUnitBlock(&func_body);
// // Make sure that the function's body is wrapped by a block
// if (!func_body.As<ir::Block>()) {
// func_body = ir::Block::Make({func_body});
Expand Down Expand Up @@ -253,7 +253,7 @@ void CodeGenSyclDevice::PrintTempBufferCreation(const ir::Buffer &buffer) {
for (int i = 0; i < buffer->shape.size(); i++) {
buffer_size = buffer_size * buffer->shape[i];
}
optim::Simplify(&buffer_size);
buffer_size = optim::ArithSimplify(buffer_size);
IrPrinter::Visit(buffer_size);
str_ += " ]";
};
Expand All @@ -268,7 +268,7 @@ void CodeGenSyclDevice::PrintTempBufferCreation(const ir::Buffer &buffer) {
for (int i = 0; i < buffer->shape.size(); i++) {
buffer_size = buffer_size * buffer->shape[i];
}
optim::Simplify(&buffer_size);
buffer_size = optim::ArithSimplify(buffer_size);
IrPrinter::Visit(buffer_size);
str_ += " ]>(item.get_group())";
break;
Expand Down
8 changes: 4 additions & 4 deletions paddle/cinn/hlir/pe/ir_schedule_pe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1300,9 +1300,9 @@ void IRCudaScheduleConv(ir::IRSchedule &ir_sch, // NOLINT

int n = output->shape[0].as_int32();
int c = output->shape[1].as_int32();
optim::Simplify(&(output->shape[2]));
output->shape[2] = optim::ArithSimplify(output->shape[2]);
int h = output->shape[2].as_int32();
optim::Simplify(&(output->shape[3]));
output->shape[3] = optim::ArithSimplify(output->shape[3]);
int w = output->shape[3].as_int32();
int rc = input_pad->shape[1].as_int32();

Expand Down Expand Up @@ -1480,8 +1480,8 @@ void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, // NOLINT

// stages[input_pad]->ComputeInline();

optim::Simplify(&(output->shape[2]));
optim::Simplify(&(output->shape[3]));
output->shape[2] = optim::ArithSimplify(output->shape[2]);
output->shape[3] = optim::ArithSimplify(output->shape[3]);

VLOG(3) << "Begin IRCudaScheduleConv2 with expr : "
<< ir_sch.GetModule().GetExprs().at(0);
Expand Down
4 changes: 2 additions & 2 deletions paddle/cinn/ir/group_schedule/config/group_tile_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ bool CheckTensorIsBroadcastAndContinuous(
bool is_broadcast = false;
for (int i = 0; i < indices.size(); ++i) {
ir::Expr index = indices[i];
cinn::optim::Simplify(&index);
index = optim::ArithSimplify(index);
if (index.is_constant() && index.get_constant() == 0) {
is_broadcast = true;
continue;
Expand Down Expand Up @@ -244,7 +244,7 @@ bool CheckTensorIsContinuous(
const std::unordered_map<ir::Var, ir::Expr>& iter_var2value) {
for (int i = 0; i < indices.size(); ++i) {
ir::Expr index = indices[i];
cinn::optim::Simplify(&index);
index = optim::ArithSimplify(index);
if (index.is_constant()) return false;
if (!index.is_var()) return false;
ir::Var iter_var = index.as_var_ref();
Expand Down
8 changes: 6 additions & 2 deletions paddle/cinn/ir/ir_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,12 @@ enum class StmtNodeTy { kUnk = -1, NODETY_FORALL_STMT(__m) };
//! String representations for IrNodeTy.
// @{
#define __m(x__) #x__,
const std::vector<std::string> kIrNodeTyReprs(
{NODETY_FORALL(__m) "IterSplit", "IterSum", "IterMark", "None"});
const std::vector<std::string> kIrNodeTyReprs({"Module",
"LoweredFunc",
"IterSplit",
"IterSum",
"IterMark",
NODETY_FORALL(__m)});
#undef __m
// @}

Expand Down
6 changes: 2 additions & 4 deletions paddle/cinn/lang/compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,12 @@ ir::Tensor Compute(const std::vector<Expr> &domain,

// construct the shape.
for (auto dim : domain) {
auto copied = dim;
optim::Simplify(&copied);
auto copied = optim::ArithSimplify(dim);
domain_without_reduce_axis.push_back(copied);
}

for (auto dim : shape) {
auto copied = dim;
optim::Simplify(&copied);
auto copied = optim::ArithSimplify(dim);
shape_simplified.push_back(copied);
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/lang/lower_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ std::vector<ir::LoweredFunc> LowerImpl::operator()() {

if (support_ir_schedule_) {
optim::TransformPolyForToFor(&func->body);
optim::SimplifyBlocks(&func->body);
optim::SimplifyUnitBlock(&func->body);
func->body = ir::Block::Make({func->body});
result.push_back(func);
num_func++;
Expand Down
Loading