【Paddle Tensor 规范化第二期】paddle.round supports int and complex (#72239)

fangfangssj · web-flow · commit 1c5dc6ebe3f1 · 2025-04-25T11:38:46.000+08:00
* add int and complex

* fix

* fix ci

* fix ci

* fix ci

* fix

* fix ci

* fix ci
diff --git a/paddle/phi/kernels/cpu/activation_grad_kernel.cc b/paddle/phi/kernels/cpu/activation_grad_kernel.cc
@@ -478,13 +478,23 @@ PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL_WITH_COMPLEX(log_double_grad,
 PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(hardswish_grad,
                                                 HardSwishGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL(swish_grad, SwishGradKernel)
-PD_REGISTER_ACTIVATION_GRAD_KERNEL(round_grad, RoundGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL(floor_grad, FloorGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL(ceil_grad, CeilGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL(celu_grad, CeluGradKernel)
 PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL(celu_double_grad,
                                           CeluDoubleGradKernel)
 
+PD_REGISTER_KERNEL(round_grad,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::RoundGradKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {}
+
 PD_REGISTER_KERNEL(pow_grad,
                    CPU,
                    ALL_LAYOUT,
diff --git a/paddle/phi/kernels/cpu/activation_kernel.cc b/paddle/phi/kernels/cpu/activation_kernel.cc
@@ -253,11 +253,21 @@ PD_REGISTER_ACTIVATION_KERNEL(hardsigmoid, HardSigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel)
 PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
 PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(hardswish, HardSwishKernel)
-PD_REGISTER_ACTIVATION_KERNEL(round, RoundKernel)
 PD_REGISTER_ACTIVATION_KERNEL(floor, FloorKernel)
 PD_REGISTER_ACTIVATION_KERNEL(ceil, CeilKernel)
 PD_REGISTER_ACTIVATION_KERNEL(celu, CeluKernel)
 
+PD_REGISTER_KERNEL(round,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::RoundKernel,
+                   int,
+                   int64_t,
+                   float,
+                   double,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {}
+
 PD_REGISTER_KERNEL(exp,
                    CPU,
                    ALL_LAYOUT,
diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h
@@ -2999,7 +2999,7 @@ struct FloorFunctor : public BaseActivationFunctor<T> {
 };
 
 // round(x) = [x]
-template <typename T>
+template <typename T, typename Enable = void>
 struct RoundFunctor : public BaseActivationFunctor<T> {
   int decimals;
 
@@ -3010,13 +3010,85 @@ struct RoundFunctor : public BaseActivationFunctor<T> {
   template <typename Device, typename X, typename Out>
   void operator()(Device d, X x, Out out) const {
     if (decimals == 0) {
-      out.device(d) = x.round();
+      out.device(d) = x.unaryExpr([](const T& val) {
+        return (std::isnan(val) || std::isinf(val)) ? val : std::rint(val);
+      });
+    } else if (decimals > 0) {
+      auto ten_pow_decimals = static_cast<T>(std::pow(10, decimals));
+      out.device(d) = x.unaryExpr([ten_pow_decimals](const T& val) {
+        return (std::isnan(val) || std::isinf(val))
+                   ? val
+                   : std::rint(val * ten_pow_decimals) / ten_pow_decimals;
+      });
+    } else {
+      auto ten_pow_decimals = static_cast<T>(std::pow(10, -decimals));
+      out.device(d) = x.unaryExpr([ten_pow_decimals](const T& val) {
+        return (std::isnan(val) || std::isinf(val))
+                   ? val
+                   : std::rint(val / ten_pow_decimals) * ten_pow_decimals;
+      });
+    }
+  }
+};
+
+template <typename T>
+struct RoundFunctor<T, std::enable_if_t<std::is_integral_v<T>>>
+    : public BaseActivationFunctor<T> {
+  int decimals;
+
+  std::vector<std::pair<const char*, int*>> GetAttrs() {
+    return {{"decimals", &decimals}};
+  }
+
+  template <typename Device, typename X, typename Out>
+  void operator()(Device d, X x, Out out) const {
+    out.device(d) = x;
+  }
+};
+
+template <typename T>
+struct RoundFunctor<phi::dtype::complex<T>>
+    : public BaseActivationFunctor<phi::dtype::complex<T>> {
+  int decimals;
+
+  std::vector<std::pair<const char*, int*>> GetAttrs() {
+    return {{"decimals", &decimals}};
+  }
+
+  template <typename Device, typename X, typename Out>
+  void operator()(Device d, X x, Out out) const {
+    using ComplexT = phi::dtype::complex<T>;
+
+    if (decimals == 0) {
+      out.device(d) = x.unaryExpr([](const ComplexT& c) {
+        T real = std::isnan(c.real) || std::isinf(c.real) ? c.real
+                                                          : std::rint(c.real);
+        T imag = std::isnan(c.imag) || std::isinf(c.imag) ? c.imag
+                                                          : std::rint(c.imag);
+        return ComplexT(real, imag);
+      });
     } else if (decimals > 0) {
       auto ten_pow_decimals = static_cast<T>(std::pow(10, decimals));
-      out.device(d) = (x * ten_pow_decimals).round() / ten_pow_decimals;
+      out.device(d) = x.unaryExpr([ten_pow_decimals](const ComplexT& c) {
+        T real = std::isnan(c.real) || std::isinf(c.real)
+                     ? c.real
+                     : std::rint(c.real * ten_pow_decimals) / ten_pow_decimals;
+        T imag = std::isnan(c.imag) || std::isinf(c.imag)
+                     ? c.imag
+                     : std::rint(c.imag * ten_pow_decimals) / ten_pow_decimals;
+        return ComplexT(real, imag);
+      });
     } else {
       auto ten_pow_decimals = static_cast<T>(std::pow(10, -decimals));
-      out.device(d) = (x / ten_pow_decimals).round() * ten_pow_decimals;
+      out.device(d) = x.unaryExpr([ten_pow_decimals](const ComplexT& c) {
+        T real = std::isnan(c.real) || std::isinf(c.real)
+                     ? c.real
+                     : std::rint(c.real / ten_pow_decimals) * ten_pow_decimals;
+        T imag = std::isnan(c.imag) || std::isinf(c.imag)
+                     ? c.imag
+                     : std::rint(c.imag / ten_pow_decimals) * ten_pow_decimals;
+        return ComplexT(real, imag);
+      });
     }
   }
 };
@@ -5318,7 +5390,7 @@ struct CudaFloorFunctor : public BaseActivationFunctor<T> {
   }
 };
 
-template <typename T>
+template <typename T, typename Enable = void>
 struct CudaRoundFunctor : public BaseActivationFunctor<T> {
   using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
   int decimals;
@@ -5330,20 +5402,79 @@ struct CudaRoundFunctor : public BaseActivationFunctor<T> {
   __device__ __forceinline__ T operator()(const T arg_x) const {
     MPType x = static_cast<MPType>(arg_x);
 
+    if (isnan(x) || isinf(x)) return arg_x;
     if (decimals == 0) {
-      return static_cast<T>(round(x));
+      return static_cast<T>(std::rint(x));
     } else if (decimals > 0) {
-      float ten_pow_decimals = powf(10., decimals);
-      return static_cast<T>(round(x * static_cast<MPType>(ten_pow_decimals)) /
+      MPType ten_pow_decimals =
+          pow(static_cast<MPType>(10), static_cast<MPType>(decimals));
+      return static_cast<T>(rint(x * static_cast<MPType>(ten_pow_decimals)) /
                             ten_pow_decimals);
     } else {
-      float ten_pow_decimals = powf(10., -decimals);
-      return static_cast<T>(round(x / static_cast<MPType>(ten_pow_decimals)) *
+      MPType ten_pow_decimals =
+          pow(static_cast<MPType>(10), static_cast<MPType>(-decimals));
+      return static_cast<T>(rint(x / static_cast<MPType>(ten_pow_decimals)) *
                             ten_pow_decimals);
     }
   }
 };
 
+template <typename T>
+struct CudaRoundFunctor<T, std::enable_if_t<std::is_integral_v<T>>>
+    : public BaseActivationFunctor<T> {
+  int decimals;
+
+  std::vector<std::pair<const char*, int*>> GetAttrs() {
+    return {{"decimals", &decimals}};
+  }
+  // round(x) = round(x)
+  __device__ __forceinline__ T operator()(const T arg_x) const { return arg_x; }
+};
+
+template <typename T>
+struct CudaRoundFunctor<phi::dtype::complex<T>>
+    : public BaseActivationFunctor<phi::dtype::complex<T>> {
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  int decimals;
+
+  std::vector<std::pair<const char*, int*>> GetAttrs() {
+    return {{"decimals", &decimals}};
+  }
+
+  __device__ __forceinline__ phi::dtype::complex<T> operator()(
+      const phi::dtype::complex<T> arg_x) const {
+    MPType real_part = static_cast<MPType>(arg_x.real);
+    MPType imag_part = static_cast<MPType>(arg_x.imag);
+    bool real_special = isnan(real_part) || isinf(real_part);
+    bool imag_special = isnan(imag_part) || isinf(imag_part);
+    MPType real, imag;
+
+    if (decimals == 0) {
+      real = real_special ? real_part : rint(real_part);
+      imag = imag_special ? imag_part : rint(imag_part);
+    } else if (decimals > 0) {
+      MPType ten_pow_decimals =
+          pow(static_cast<MPType>(10), static_cast<MPType>(decimals));
+      real = real_special
+                 ? real_part
+                 : rint(real_part * ten_pow_decimals) / ten_pow_decimals;
+      imag = imag_special
+                 ? imag_part
+                 : rint(imag_part * ten_pow_decimals) / ten_pow_decimals;
+    } else {
+      MPType ten_pow_decimals =
+          pow(static_cast<MPType>(10), static_cast<MPType>(-decimals));
+      real = real_special
+                 ? real_part
+                 : rint(real_part / ten_pow_decimals) * ten_pow_decimals;
+      imag = imag_special
+                 ? imag_part
+                 : rint(imag_part / ten_pow_decimals) * ten_pow_decimals;
+    }
+    return phi::dtype::complex<T>(static_cast<T>(real), static_cast<T>(imag));
+  }
+};
+
 // GradFunctor for ceil, floor and round
 template <typename T>
 struct CudaZeroGradFunctor : public BaseActivationFunctor<T> {
diff --git a/paddle/phi/kernels/gpu/activation_grad_kernel.cu b/paddle/phi/kernels/gpu/activation_grad_kernel.cu
@@ -547,12 +547,23 @@ PD_REGISTER_KERNEL(log_double_grad,
 PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(hardswish_grad,
                                                 HardSwishGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL(swish_grad, SwishGradKernel)
-PD_REGISTER_ACTIVATION_GRAD_KERNEL(round_grad, RoundGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL(floor_grad, FloorGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL(ceil_grad, CeilGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL(celu_grad, CeluGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL(celu_double_grad, CeluDoubleGradKernel)
 
+PD_REGISTER_KERNEL(round_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::RoundGradKernel,
+                   int,
+                   int64_t,
+                   float,
+                   double,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {}
 PD_REGISTER_KERNEL(pow_grad,
                    GPU,
                    ALL_LAYOUT,
diff --git a/paddle/phi/kernels/gpu/activation_kernel.cu b/paddle/phi/kernels/gpu/activation_kernel.cu
@@ -336,13 +336,24 @@ PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(logsigmoid, LogSigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL(hardsigmoid, HardSigmoidKernel)
 PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(hardswish, HardSwishKernel)
 PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel)
-PD_REGISTER_ACTIVATION_KERNEL(round, RoundKernel)
 PD_REGISTER_ACTIVATION_KERNEL(floor, FloorKernel)
 PD_REGISTER_ACTIVATION_KERNEL(ceil, CeilKernel)
 PD_REGISTER_ACTIVATION_KERNEL(celu, CeluKernel)
 PD_REGISTER_ACTIVATION_KERNEL(selu, SeluKernel)
 PD_REGISTER_ACTIVATION_KERNEL(logit, LogitCUDAKernel)
 
+PD_REGISTER_KERNEL(round,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::RoundKernel,
+                   int,
+                   int64_t,
+                   float,
+                   double,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {}
 PD_REGISTER_KERNEL(log,
                    GPU,
                    ALL_LAYOUT,
diff --git a/python/paddle/tensor/ops.py b/python/paddle/tensor/ops.py
@@ -810,7 +810,7 @@ def round(x: Tensor, decimals: int = 0, name: str | None = None) -> Tensor:
           out.data = [1., -1., 3., 1.]
 
     Args:
-        x (Tensor): Input of Round operator, an N-D Tensor, with data type bfloat16, float32, float64 or float16.
+        x (Tensor): Input of Round operator, an N-D Tensor, with data type bfloat16, int32, int64, float32, float64, float16, complex64 or complex128.
         decimals(int): Rounded decimal place (default: 0).
         name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
 
@@ -826,13 +826,25 @@ def round(x: Tensor, decimals: int = 0, name: str | None = None) -> Tensor:
             >>> out = paddle.round(x)
             >>> print(out)
             Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
-            [-1., -0.,  1.,  2.])
+            [-0., -0.,  1.,  2.])
     """
     if in_dynamic_or_pir_mode():
         return _C_ops.round(x, decimals)
     else:
         check_variable_and_dtype(
-            x, 'x', ['float16', 'uint16', 'float32', 'float64'], 'round'
+            x,
+            'x',
+            [
+                'float16',
+                'uint16',
+                'int32',
+                'int64',
+                'float32',
+                'float64',
+                'complex64',
+                'complex128',
+            ],
+            'round',
         )
         helper = LayerHelper('round', **locals())
         attrs = {
diff --git a/test/legacy_test/test_round_op.py b/test/legacy_test/test_round_op.py