[CINN]Fix 0 size ops bug (#71586)

phlrain · web-flow · commit a4acffdb747b · 2025-03-18T17:18:25.000+08:00
* update * update * update * update * remove uselese code * fix roi align grad kernel * [PHI]fix 0 size error (#71485) * update * update * update * update * remove uselese code * fix bug
diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc
@@ -2434,16 +2434,19 @@ void IndexSelectInferMeta(const MetaTensor& x,
           "the dimension of Input(Index) is [%d].",
           index_dim,
           index_dim.size()));
-
-  PADDLE_ENFORCE_EQ(index_dim[0] != 0,
-                    true,
-                    common::errors::InvalidArgument(
-                        "The length of Input(Index) can't be 0."));
-
-  auto output_dim = common::vectorize(input_dim);
   if (dim < 0) {
     dim += input_dim.size();
   }
+
+  if (input_dim[dim] != 0) {
+    PADDLE_ENFORCE_EQ(index_dim[0] != 0,
+                      true,
+                      common::errors::InvalidArgument(
+                          "The length of Input(Index) can't be 0."));
+  }
+
+  auto output_dim = common::vectorize(input_dim);
+
   output_dim[dim] = index_dim[0];
   output->set_dims(common::make_ddim(output_dim));
   output->set_dtype(x.dtype());
@@ -3668,18 +3671,23 @@ void RepeatInterleaveWithTensorIndexInferMeta(const MetaTensor& x,
           repeats_dim,
           repeats_dim.size()));
 
-  PADDLE_ENFORCE_EQ(repeats_dim[0] != 0,
-                    true,
-                    common::errors::InvalidArgument(
-                        "The length of Input(RepeatsTensor) can't be 0."));
-  PADDLE_ENFORCE_NE(out,
-                    nullptr,
-                    common::errors::InvalidArgument(
-                        "repeat_interleave's output tensor can't be nullptr"));
-  if (dim < 0) {
-    dim += input_dim.size();
+  if (input_dim.size() == 1 && input_dim[0] == 0) {
+    output_dim[0] = 0;
+  } else {
+    PADDLE_ENFORCE_EQ(repeats_dim[0] != 0,
+                      true,
+                      common::errors::InvalidArgument(
+                          "The length of Input(RepeatsTensor) can't be 0."));
+    PADDLE_ENFORCE_NE(
+        out,
+        nullptr,
+        common::errors::InvalidArgument(
+            "repeat_interleave's output tensor can't be nullptr"));
+    if (dim < 0) {
+      dim += input_dim.size();
+    }
+    output_dim[dim] = -1;
   }
-  output_dim[dim] = -1;
 
   out->set_dims(common::make_ddim(output_dim));
   out->share_lod(x);
diff --git a/paddle/phi/kernels/gpu/concat_kernel.cu b/paddle/phi/kernels/gpu/concat_kernel.cu
@@ -45,6 +45,10 @@ void ConcatKernel(const Context& dev_ctx,
   out->Resize(out_dims);
   dev_ctx.template Alloc<T>(out);
 
+  if (out->numel() == 0) {
+    return;
+  }
+
   // If axis is 0, the lod of the output is not the same as inputs.
   if (axis == 0 && x[0]->lod().size() > 0) {
     size_t lod_size_0 = x[0]->lod().size();
diff --git a/paddle/phi/kernels/gpu/masked_select_kernel.cu b/paddle/phi/kernels/gpu/masked_select_kernel.cu
@@ -53,6 +53,13 @@ void MaskedSelectKernel(const Context& dev_ctx,
   DenseTensor mask_expand;
   DenseTensor x_expand;
 
+  if (x.numel() == 0 || mask.numel() == 0) {
+    out->Resize({0});
+    dev_ctx.template Alloc<T>(out);
+
+    return;
+  }
+
   auto expanded_size = funcs::MatrixGetBroadcastBatchPortion(
       common::vectorize(x.dims()), common::vectorize(mask.dims()));
 
diff --git a/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu b/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu
@@ -21,6 +21,7 @@
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/empty_kernel.h"
+#include "paddle/phi/kernels/full_kernel.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace phi {
@@ -176,7 +177,16 @@ void RoiAlignGradKernel(const Context& dev_ctx,
                         int sampling_ratio,
                         bool aligned,
                         DenseTensor* dx) {
+  if (x.numel() == 0 || boxes.numel() == 0) {
+    dev_ctx.template Alloc<T>(dx);
+
+    phi::FullKernel<T>(
+        dev_ctx, common::vectorize(dx->dims()), 0.0, dx->dtype(), dx);
+    return;
+  }
+
   int rois_num = boxes.dims()[0];
+
   int channels = x.dims()[1];
   int height = x.dims()[2];
   int width = x.dims()[3];
@@ -185,6 +195,11 @@ void RoiAlignGradKernel(const Context& dev_ctx,
     return;
   }
 
+  // if (dx->numel() == 0) {
+  //   dev_ctx.template Alloc<T>(dx);
+
+  //   return;
+  // }
   DenseTensor box_batch_id_list;
   box_batch_id_list.Resize({rois_num});
   int* box_batch_size = dev_ctx.template HostAlloc<int>(&box_batch_id_list);
diff --git a/paddle/phi/kernels/gpu/roi_align_kernel.cu b/paddle/phi/kernels/gpu/roi_align_kernel.cu
@@ -145,6 +145,10 @@ void RoiAlignKernel(const Context& dev_ctx,
                     int sampling_ratio,
                     bool aligned,
                     DenseTensor* out) {
+  if (out->numel() == 0) {
+    dev_ctx.template Alloc<T>(out);
+    return;
+  }
   auto in_dims = x.dims();
   int batch_size = in_dims[0];
   int channels = in_dims[1];
diff --git a/paddle/phi/kernels/impl/repeat_interleave_kernel_impl.h b/paddle/phi/kernels/impl/repeat_interleave_kernel_impl.h
@@ -120,6 +120,10 @@ void RepeatInterleaveWithTensorIndexKernel(const Context& ctx,
                                            const DenseTensor& repeats_tensor,
                                            int dim,
                                            DenseTensor* out) {
+  if (x.numel() == 0) {
+    ctx.template Alloc<T>(out);
+    return;
+  }
   auto place = ctx.GetPlace();
   auto cpu_place = phi::CPUPlace();
 
diff --git a/paddle/phi/kernels/kps/reduce_kernel.cu b/paddle/phi/kernels/kps/reduce_kernel.cu
@@ -249,9 +249,16 @@ void SumRawKernel(const Context& dev_ctx,
       }
     }
     out->Resize(phi::make_ddim(out_dims));
-    dev_ctx.template Alloc<T>(out);
-    FullKernel<T, Context>(
-        dev_ctx, out_dims, 0, phi::CppTypeToDataType<T>::Type(), out);
+    if (x.dtype() == phi::DataType::BOOL || x.dtype() == phi::DataType::INT32) {
+      dev_ctx.template Alloc<int64_t>(out);
+      FullKernel<int64_t, Context>(
+          dev_ctx, out_dims, 0, phi::CppTypeToDataType<int64_t>::Type(), out);
+    } else {
+      dev_ctx.template Alloc<T>(out);
+      FullKernel<T, Context>(
+          dev_ctx, out_dims, 0, phi::CppTypeToDataType<T>::Type(), out);
+    }
+
     return;
   }
   if (x.numel() > std::numeric_limits<int32_t>::max()) {
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
@@ -1409,8 +1409,6 @@ def concat(
     if in_dynamic_mode():
         if isinstance(axis, Variable):
             axis = axis.item(0)
-        if not isinstance(input, (Variable, paddle.pir.Value)):
-            input = [t for t in input if t.shape.count(0) == 0]
         return _C_ops.concat(input, axis)
     elif in_pir_mode():