PaddlePaddle · HydrogenSulfate · May 15, 2025 · May 9, 2025 · May 12, 2025 · May 12, 2025
diff --git a/paddle/phi/backends/xpu/xpu3_op_list.cc b/paddle/phi/backends/xpu/xpu3_op_list.cc
@@ -298,6 +298,8 @@ XPUOpMap& get_kl3_ops() {
        XPUKernelSet({phi::DataType::FLOAT32,
                      phi::DataType::FLOAT16,
                      phi::DataType::BFLOAT16})},
+      {"complex", XPUKernelSet({phi::DataType::FLOAT32})},
+      {"complex_grad", XPUKernelSet({phi::DataType::FLOAT32})},
       {"concat_grad",
        XPUKernelSet({phi::DataType::FLOAT32,
                      phi::DataType::FLOAT16,
@@ -801,6 +803,8 @@ XPUOpMap& get_kl3_ops() {
       {"huber_loss", XPUKernelSet({phi::DataType::FLOAT32})},
       {"kldiv_loss", XPUKernelSet({phi::DataType::FLOAT32})},
       {"kldiv_loss_grad", XPUKernelSet({phi::DataType::FLOAT32})},
+      {"imag", XPUKernelSet({phi::DataType::COMPLEX64})},
+      {"imag_grad", XPUKernelSet({phi::DataType::COMPLEX64})},
       {"increment",
        XPUKernelSet({phi::DataType::FLOAT32,
                      phi::DataType::INT32,
@@ -1098,6 +1102,8 @@ XPUOpMap& get_kl3_ops() {
                      phi::DataType::INT64,
                      phi::DataType::FLOAT32,
                      phi::DataType::FLOAT64})},
+      {"real", XPUKernelSet({phi::DataType::COMPLEX64})},
+      {"real_grad", XPUKernelSet({phi::DataType::COMPLEX64})},
       {"reciprocal", XPUKernelSet({phi::DataType::FLOAT32})},
       {"reciprocal_grad",
        XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},

diff --git a/paddle/phi/kernels/xpu/complex_grad_kernel.cc b/paddle/phi/kernels/xpu/complex_grad_kernel.cc
@@ -0,0 +1,138 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/complex_grad_kernel.h"
+
+#include "fft/cuComplex.h"
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/common/complex.h"
+#include "paddle/phi/common/type_traits.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/expand_grad_kernel.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+
+namespace xfft_internal::xpu {
+int combine_as_complex(int N, float* real, float* imag, float2* out);
+int complex_spilt_float(int N, float2* in, float* real, float* imag);
+}  // namespace xfft_internal::xpu
+
+namespace phi {
+
+template <class T, class Context>
+static DenseTensor Fill(const Context& ctx,
+                        std::vector<int> shape,
+                        T fill_value) {
+  DenseTensor ret;
+  ret.Resize(common::make_ddim(shape));
+  ctx.template Alloc<T>(&ret);
+  funcs::SetConstant<Context, T>()(ctx, &ret, fill_value);
+  return ret;
+}
+
+template <typename T, typename Context>
+void RealGradKernel(const Context& dev_ctx,
+                    const DenseTensor& dout,
+                    DenseTensor* dx) {
+  auto numel = dout.numel();
+  auto* dx_data =
+      dev_ctx.template Alloc<T>(dx, static_cast<size_t>(numel * sizeof(T)));
+  DenseTensor imag = Fill<phi::dtype::Real<T>, Context>(
+      dev_ctx, common::vectorize<int>(dout.dims()), phi::dtype::Real<T>(0.0));
+  int r = xfft_internal::xpu::combine_as_complex(
+      numel,
+      const_cast<phi::dtype::Real<T>*>(dout.data<phi::dtype::Real<T>>()),
+      imag.data<phi::dtype::Real<T>>(),
+      reinterpret_cast<cuFloatComplex*>(dx_data));
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "real_grad");
+}
+
+template <typename T, typename Context>
+void ImagGradKernel(const Context& dev_ctx,
+                    const DenseTensor& dout,
+                    DenseTensor* dx) {
+  auto numel = dout.numel();
+  auto* dx_data =
+      dev_ctx.template Alloc<T>(dx, static_cast<size_t>(numel * sizeof(T)));
+  DenseTensor real = Fill<phi::dtype::Real<T>, Context>(
+      dev_ctx, common::vectorize<int>(dout.dims()), phi::dtype::Real<T>(0.0));
+  int r = xfft_internal::xpu::combine_as_complex(
+      numel,
+      real.data<phi::dtype::Real<T>>(),
+      const_cast<phi::dtype::Real<T>*>(dout.data<phi::dtype::Real<T>>()),
+      reinterpret_cast<cuFloatComplex*>(dx_data));
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "imag_grad");
+}
+
+template <typename T, typename Context>
+void ComplexGradKernel(const Context& dev_ctx,
+                       const DenseTensor& x,
+                       const DenseTensor& y,
+                       const DenseTensor& dout,
+                       DenseTensor* dx,
+                       DenseTensor* dy) {
+  using C = phi::dtype::complex<T>;
+  auto numel = dout.numel();
+  DenseTensor real_dout, imag_dout;
+  real_dout.Resize(dout.dims());
+  imag_dout.Resize(dout.dims());
+  T* real_data = dev_ctx.template Alloc<T>(&real_dout);
+  T* imag_data = dev_ctx.template Alloc<T>(&imag_dout);
+
+  int r = xfft_internal::xpu::complex_spilt_float(
+      numel,
+      reinterpret_cast<cuFloatComplex*>(const_cast<C*>(dout.data<C>())),
+      real_data,
+      imag_data);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "complex_grad");
+
+  if (dx) {
+    if (x.dims() == dout.dims()) {
+      dx->ShareDataWith(real_dout);
+    } else {
+      ExpandGradKernel<T, Context>(
+          dev_ctx, x, real_dout, phi::IntArray(phi::vectorize(x.dims())), dx);
+    }
+  }
+
+  if (dy) {
+    if (y.dims() == dout.dims()) {
+      dy->ShareDataWith(imag_dout);
+    } else {
+      ExpandGradKernel<T, Context>(
+          dev_ctx, y, imag_dout, phi::IntArray(phi::vectorize(y.dims())), dy);
+    }
+  }
+}
+}  // namespace phi
+
+PD_REGISTER_KERNEL(imag_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::ImagGradKernel,
+                   phi::dtype::complex<float>) {
+  kernel->InputAt(0).SetDataType(phi::dtype::ToReal(kernel_key.dtype()));
+}
+
+PD_REGISTER_KERNEL(real_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::RealGradKernel,
+                   phi::dtype::complex<float>) {
+  kernel->InputAt(0).SetDataType(phi::dtype::ToReal(kernel_key.dtype()));
+}
+
+PD_REGISTER_KERNEL(
+    complex_grad, XPU, ALL_LAYOUT, phi::ComplexGradKernel, float) {
+  kernel->InputAt(2).SetDataType(phi::dtype::ToComplex(kernel_key.dtype()));
+}
diff --git a/paddle/phi/kernels/xpu/complex_kernel.cc b/paddle/phi/kernels/xpu/complex_kernel.cc
@@ -0,0 +1,137 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/complex_kernel.h"
+
+#include "fft/cuComplex.h"
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/common/complex.h"
+#include "paddle/phi/common/type_traits.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/expand_kernel.h"
+#include "paddle/phi/kernels/funcs/common_infer_shape_functions.h"
+
+namespace xfft_internal::xpu {
+int combine_as_complex(int N, float* real, float* imag, float2* out);
+int complex_spilt_float(int N, float2* in, float* real, float* imag);
+int Conj(int N, float2* input, float2* output);
+}  // namespace xfft_internal::xpu
+
+namespace phi {
+template <typename T, typename Context>
+void ConjKernel(const Context& dev_ctx,
+                const DenseTensor& x,
+                DenseTensor* out) {
+  dev_ctx.template Alloc<T>(out);
+  int r = xfft_internal::xpu::Conj(
+      x.numel(),
+      reinterpret_cast<cuFloatComplex*>(const_cast<T*>(x.data<T>())),
+      reinterpret_cast<cuFloatComplex*>(out->data<T>()));
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "conj");
+}
+
+template <typename T, typename Context>
+void RealKernel(const Context& dev_ctx,
+                const DenseTensor& x,
+                DenseTensor* out) {
+  dev_ctx.template Alloc<phi::dtype::Real<T>>(out);
+  phi::DenseTensor imag;
+  imag.Resize(x.dims());
+  dev_ctx.template Alloc<phi::dtype::Real<T>>(&imag);
+  int r = xfft_internal::xpu::complex_spilt_float(
+      out->numel(),
+      reinterpret_cast<cuFloatComplex*>(const_cast<T*>(x.data<T>())),
+      out->data<phi::dtype::Real<T>>(),
+      imag.data<phi::dtype::Real<T>>());
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "real");
+}
+
+template <typename T, typename Context>
+void ImagKernel(const Context& dev_ctx,
+                const DenseTensor& x,
+                DenseTensor* out) {
+  dev_ctx.template Alloc<phi::dtype::Real<T>>(out);
+  phi::DenseTensor real;
+  real.Resize(x.dims());
+  dev_ctx.template Alloc<phi::dtype::Real<T>>(&real);
+  int r = xfft_internal::xpu::complex_spilt_float(
+      out->numel(),
+      reinterpret_cast<cuFloatComplex*>(const_cast<T*>(x.data<T>())),
+      real.data<phi::dtype::Real<T>>(),
+      out->data<phi::dtype::Real<T>>());
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "imag");
+}
+
+template <typename T, typename Context>
+void ComplexKernel(const Context& dev_ctx,
+                   const DenseTensor& x,
+                   const DenseTensor& y,
+                   DenseTensor* out) {
+  using C = phi::dtype::complex<T>;
+  auto x_dims = x.dims();
+  auto y_dims = y.dims();
+  auto out_dims = phi::funcs::BroadcastTwoDims(x_dims, y_dims);
+  std::vector<int64_t> out_dims_vec = phi::vectorize(out_dims);
+
+  DenseTensor broadcasted_x, broadcasted_y;
+  T* x_data = nullptr;
+  T* y_data = nullptr;
+
+  if (x_dims == out_dims) {
+    x_data = const_cast<T*>(x.data<T>());
+  } else {
+    broadcasted_x.Resize(out_dims);
+    dev_ctx.template Alloc<T>(&broadcasted_x);
+    ExpandKernel<T, Context>(
+        dev_ctx, x, phi::IntArray(out_dims_vec), &broadcasted_x);
+    x_data = broadcasted_x.data<T>();
+  }
+
+  if (y_dims == out_dims) {
+    y_data = const_cast<T*>(y.data<T>());
+  } else {
+    broadcasted_y.Resize(out_dims);
+    dev_ctx.template Alloc<T>(&broadcasted_y);
+    ExpandKernel<T, Context>(
+        dev_ctx, y, phi::IntArray(out_dims_vec), &broadcasted_y);
+    y_data = broadcasted_y.data<T>();
+  }
+
+  dev_ctx.template Alloc<C>(out);
+  int r = xfft_internal::xpu::combine_as_complex(
+      out->numel(),
+      x_data,
+      y_data,
+      reinterpret_cast<cuFloatComplex*>(out->data<C>()));
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "complex");
+}
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    conj, XPU, ALL_LAYOUT, phi::ConjKernel, float, phi::dtype::complex<float>) {
+}
+
+PD_REGISTER_KERNEL(
+    real, XPU, ALL_LAYOUT, phi::RealKernel, phi::dtype::complex<float>) {
+  kernel->OutputAt(0).SetDataType(phi::dtype::ToReal(kernel_key.dtype()));
+}
+
+PD_REGISTER_KERNEL(
+    imag, XPU, ALL_LAYOUT, phi::ImagKernel, phi::dtype::complex<float>) {
+  kernel->OutputAt(0).SetDataType(phi::dtype::ToReal(kernel_key.dtype()));
+}
+
+PD_REGISTER_KERNEL(complex, XPU, ALL_LAYOUT, phi::ComplexKernel, float) {
+  kernel->OutputAt(0).SetDataType(phi::dtype::ToComplex(kernel_key.dtype()));
+}