PaddlePaddle
diff --git a/‎paddle/phi/kernels/cpu/elementwise_kernel.cc
Lines changed: 19 additions & 0 deletions b/‎paddle/phi/kernels/cpu/elementwise_kernel.cc
Lines changed: 19 additions & 0 deletions
diff --git a/‎paddle/phi/kernels/cpu/nextafter_kernel.cc
Lines changed: 0 additions & 22 deletions b/‎paddle/phi/kernels/cpu/nextafter_kernel.cc
Lines changed: 0 additions & 22 deletions
diff --git a/‎paddle/phi/kernels/elementwise_kernel.h
Lines changed: 6 additions & 0 deletions b/‎paddle/phi/kernels/elementwise_kernel.h
Lines changed: 6 additions & 0 deletions
diff --git a/‎paddle/phi/kernels/funcs/elementwise_functor.h
Lines changed: 51 additions & 0 deletions b/‎paddle/phi/kernels/funcs/elementwise_functor.h
Lines changed: 51 additions & 0 deletions
diff --git a/‎paddle/phi/kernels/gpu/nextafter_kernel.cu
Lines changed: 0 additions & 22 deletions b/‎paddle/phi/kernels/gpu/nextafter_kernel.cu
Lines changed: 0 additions & 22 deletions
diff --git a/‎paddle/phi/kernels/impl/nextafter_kernel_impl.h
Lines changed: 0 additions & 93 deletions b/‎paddle/phi/kernels/impl/nextafter_kernel_impl.h
Lines changed: 0 additions & 93 deletions
diff --git a/‎paddle/phi/kernels/kps/elementwise_kernel.cu
Lines changed: 14 additions & 0 deletions b/‎paddle/phi/kernels/kps/elementwise_kernel.cu
Lines changed: 14 additions & 0 deletions
diff --git a/‎paddle/phi/kernels/nextafter_kernel.h
Lines changed: 0 additions & 28 deletions b/‎paddle/phi/kernels/nextafter_kernel.h
Lines changed: 0 additions & 28 deletions
diff --git a/‎test/legacy_test/test_nextafter_op.py
Lines changed: 37 additions & 2 deletions b/‎test/legacy_test/test_nextafter_op.py
Lines changed: 37 additions & 2 deletions
@@ -106,6 +106,23 @@ void CopySignKernel(const Context& dev_ctx,
   }
 }
 
+template <typename T, typename Context>
+void NextafterKernel(const Context& dev_ctx,
+                     const DenseTensor& x,
+                     const DenseTensor& y,
+                     DenseTensor* out) {
+  dev_ctx.template Alloc<T>(out);
+  auto x_dims = x.dims();
+  auto y_dims = y.dims();
+  if (x_dims.size() >= y_dims.size()) {
+    funcs::ElementwiseCompute<funcs::NextafterFunctor<T>, T>(
+        dev_ctx, x, y, funcs ::NextafterFunctor<T>(), out);
+  } else {
+    funcs::ElementwiseCompute<funcs::InverseNextafterFunctor<T>, T>(
+        dev_ctx, x, y, funcs::InverseNextafterFunctor<T>(), out);
+  }
+}
+
 }  // namespace phi
 
 using complex64 = ::phi::dtype::complex<float>;
@@ -193,3 +210,5 @@ PD_REGISTER_KERNEL(copysign,
                    double,
                    phi::dtype::float16,
                    phi::dtype::bfloat16) {}
+PD_REGISTER_KERNEL(
+    nextafter, CPU, ALL_LAYOUT, phi::NextafterKernel, float, double) {}
@@ -73,6 +73,12 @@ void CopySignKernel(const Context& dev_ctx,
                     const DenseTensor& y,
                     DenseTensor* out);
 
+template <typename T, typename Context>
+void NextafterKernel(const Context& dev_ctx,
+                     const DenseTensor& x,
+                     const DenseTensor& y,
+                     DenseTensor* out);
+
 template <typename T, typename Context>
 DenseTensor Maximum(const Context& dev_ctx,
                     const DenseTensor& x,
 
@@ -1174,5 +1174,56 @@ struct InverseCopySignFunctor {
   }
 };
 
+template <typename T, typename Enable = void>
+struct NextafterFunctor {
+  inline HOSTDEVICE T operator()(const T x, const T y) const {
+    return static_cast<T>(
+        std::nextafter(static_cast<float>(x), static_cast<float>(y)));
+  }
+};
+
+template <typename T>
+struct NextafterFunctor<
+    T,
+    typename std::enable_if_t<std::is_same<T, double>::value>> {
+  inline HOSTDEVICE T operator()(const T x, const T y) const {
+    return std::nextafter(x, y);
+  }
+};
+
+template <typename T>
+struct NextafterFunctor<T,
+                        typename std::enable_if_t<std::is_integral<T>::value>> {
+  inline HOSTDEVICE double operator()(const T x, const T y) const {
+    return std::nextafter(static_cast<double>(x), static_cast<double>(y));
+  }
+};
+
+template <typename T, typename Enable = void>
+struct InverseNextafterFunctor {
+  inline HOSTDEVICE T operator()(const T x, const T y) const {
+    return static_cast<T>(
+        std::nextafter(static_cast<float>(y), static_cast<float>(x)));
+  }
+};
+
+template <typename T>
+struct InverseNextafterFunctor<
+    T,
+    typename std::enable_if_t<std::is_same<T, double>::value>> {
+  inline HOSTDEVICE T operator()(const T x, const T y) const {
+    return std::nextafter(y, x);
+  }
+};
+
+template <typename T>
+struct InverseNextafterFunctor<
+    T,
+    typename std::enable_if_t<std::is_integral<T>::value>> {
+  inline HOSTDEVICE double operator()(const T x, const T y) const {
+    return std::nextafter(static_cast<double>(y), static_cast<double>(x));
+  }
+};
+
 }  // namespace funcs
 }  // namespace phi
@@ -196,6 +196,18 @@ void CopySignKernel(const Context& dev_ctx,
       dev_ctx, inputs, &outputs, funcs::CopySignFunctor<T>());
 }
 
+template <typename T, typename Context>
+void NextafterKernel(const Context& dev_ctx,
+                     const DenseTensor& x,
+                     const DenseTensor& y,
+                     DenseTensor* out) {
+  std::vector<const DenseTensor*> inputs = {&x, &y};
+  std::vector<DenseTensor*> outputs = {out};
+  dev_ctx.template Alloc<T>(out);
+  funcs::BroadcastKernel<T>(
+      dev_ctx, inputs, &outputs, funcs::NextafterFunctor<T>());
+}
+
 }  // namespace phi
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
@@ -269,6 +281,8 @@ PD_REGISTER_KERNEL(copysign,
                    double,
                    phi::dtype::float16,
                    phi::dtype::bfloat16) {}
+PD_REGISTER_KERNEL(
+    nextafter, GPU, ALL_LAYOUT, phi::NextafterKernel, float, double) {}
 
 #endif
 
 
@@ -95,9 +95,10 @@ def setUp(self):
         self.op_type = "nextafter"
         self.python_api = paddle.nextafter
         self.init_dtype()
+        self.init_shape()
 
-        x = np.array([1, 2]).astype(self.dtype)
-        y = np.array([2, 1]).astype(self.dtype)
+        x = np.random.rand(*self.x_shape).astype(self.dtype)
+        y = np.random.rand(*self.y_shape).astype(self.dtype)
         out = np.nextafter(x, y)
         self.inputs = {'x': x, 'y': y}
         self.outputs = {'out': out}
@@ -108,11 +109,45 @@ def test_check_output(self):
     def init_dtype(self):
         self.dtype = np.float64
 
+    def init_shape(self):
+        self.x_shape = (2,)
+        self.y_shape = (2,)
+
 
 class TestNextafterOPFP32(TestNextafterOP):
     def init_dtype(self):
         self.dtype = np.float32
 
 
+class TestNextafterOPFP32Case1(TestNextafterOP):
+    def init_dtype(self):
+        self.dtype = np.float32
+
+    def init_shape(self):
+        self.x_shape = (5,)
+        self.y_shape = (2, 3, 4, 5)
+
+
+class TestNextafterOPFP32Case2(TestNextafterOP):
+    def init_dtype(self):
+        self.dtype = np.float32
+
+    def init_shape(self):
+        self.x_shape = (2, 3, 4, 5)
+        self.y_shape = (1,)
+
+
+class TestNextafterOPCase1(TestNextafterOP):
+    def init_shape(self):
+        self.x_shape = (5,)
+        self.y_shape = (2, 3, 4, 5)
+
+
+class TestNextafterOPCase2(TestNextafterOP):
+    def init_shape(self):
+        self.x_shape = (2, 3, 4, 5)
+        self.y_shape = (1,)
+
+
 if __name__ == "__main__":
     unittest.main()