[XPU] bugfix for contiguous in 0-size (#73004)

zhangyk0314 · web-flow · commit 19c156f4a8dd · 2025-06-04T18:55:39.000+08:00
diff --git a/paddle/phi/kernels/xpu/contiguous_kernel.cc b/paddle/phi/kernels/xpu/contiguous_kernel.cc
@@ -24,15 +24,16 @@ template <typename T, typename Context>
 void ContiguousKernel(const Context& dev_ctx,
                       const DenseTensor& input,
                       DenseTensor* out) {
-  if (out->numel() == 0) {
-    dev_ctx.template Alloc<T>(out);
-    return;
-  }
   phi::DenseTensorMeta meta = input.meta();
   meta.strides = meta.calc_strides(meta.dims);
   meta.offset = 0;
   out->set_meta(meta);
 
+  if (out->numel() == 0) {
+    dev_ctx.template Alloc<T>(out);
+    return;
+  }
+
   // use XPUCopyTypeTrait to deal with double and int16_t copy instead of
   // XPUTypeTrait
   using XPUType = typename XPUCopyTypeTrait<T>::Type;
@@ -59,15 +60,17 @@ template <>
 void ContiguousKernel<phi::dtype::complex<float>, XPUContext>(
     const XPUContext& dev_ctx, const DenseTensor& input, DenseTensor* out) {
   using T = phi::dtype::complex<float>;
-  if (out->numel() == 0) {
-    dev_ctx.template Alloc<T>(out);
-    return;
-  }
+
   phi::DenseTensorMeta meta = input.meta();
   meta.strides = meta.calc_strides(meta.dims);
   meta.offset = 0;
   out->set_meta(meta);
 
+  if (out->numel() == 0) {
+    dev_ctx.template Alloc<T>(out);
+    return;
+  }
+
   // The current complex number implementation uses separate real/imaginary
   // parts,resulting in redundant operations and performance
   // penalties.Optimization should address this in future iterations.