PaddlePaddle · YuanRisheng · Apr 14, 2023 · Apr 13, 2023
diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc
@@ -79,4 +79,6 @@ class HashOpMaker : public framework::OpProtoAndCheckerMaker {
 namespace ops = paddle::operators;
 
 REGISTER_OP_WITHOUT_GRADIENT(hash, ops::HashOp, ops::HashOpMaker);
-REGISTER_OP_CPU_KERNEL(hash, ops::HashKernel<int>, ops::HashKernel<int64_t>);
+
+PD_REGISTER_STRUCT_KERNEL(
+    hash, CPU, ALL_LAYOUT, ops::HashKernel, int, int64_t) {}
diff --git a/paddle/fluid/operators/hash_op.h b/paddle/fluid/operators/hash_op.h
@@ -38,7 +38,7 @@ inline void HashOutputSize(const framework::DDim& in_dims,
   out_dims.emplace_back(1);
 }
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class HashKernel : public framework::OpKernel<T> {
  public:
   virtual void Compute(const framework::ExecutionContext& context) const {

diff --git a/paddle/fluid/operators/hinge_loss_op.cc b/paddle/fluid/operators/hinge_loss_op.cc
@@ -150,12 +150,15 @@ REGISTER_OPERATOR(hinge_loss,
                   ops::HingeLossGradOpMaker<paddle::framework::OpDesc>,
                   ops::HingeLossGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(hinge_loss_grad, ops::HingeLossGradOp);
-REGISTER_OP_CPU_KERNEL(hinge_loss,
-                       ops::HingeLossKernel<phi::CPUContext, float>);
-REGISTER_OP_CPU_KERNEL(hinge_loss_grad,
-                       ops::HingeLossGradKernel<phi::CPUContext, float>);
-
-REGISTER_OP_CUDA_KERNEL(hinge_loss,
-                        ops::HingeLossKernel<phi::GPUContext, float>);
-REGISTER_OP_CUDA_KERNEL(hinge_loss_grad,
-                        ops::HingeLossGradKernel<phi::GPUContext, float>);
+
+PD_REGISTER_STRUCT_KERNEL(
+    hinge_loss, CPU, ALL_LAYOUT, ops::HingeLossKernel, float) {}
+PD_REGISTER_STRUCT_KERNEL(
+    hinge_loss_grad, CPU, ALL_LAYOUT, ops::HingeLossGradKernel, float) {}
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_REGISTER_STRUCT_KERNEL(
+    hinge_loss, GPU, ALL_LAYOUT, ops::HingeLossKernel, float) {}
+PD_REGISTER_STRUCT_KERNEL(
+    hinge_loss_grad, GPU, ALL_LAYOUT, ops::HingeLossGradKernel, float) {}
+#endif
diff --git a/paddle/fluid/operators/hinge_loss_op.h b/paddle/fluid/operators/hinge_loss_op.h
@@ -20,7 +20,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename DeviceContext, typename T, typename AttrType = T>
+template <typename T, typename DeviceContext, typename AttrType = T>
 class HingeLossKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -38,7 +38,7 @@ class HingeLossKernel : public framework::OpKernel<T> {
   }
 };
 
-template <typename DeviceContext, typename T, typename AttrType = T>
+template <typename T, typename DeviceContext, typename AttrType = T>
 class HingeLossGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {

diff --git a/paddle/fluid/operators/im2sequence_op.cc b/paddle/fluid/operators/im2sequence_op.cc
@@ -195,12 +195,15 @@ REGISTER_OPERATOR(im2sequence,
                   ops::Im2SequenceGradMaker<paddle::framework::OpDesc>,
                   ops::Im2SequenceGradMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(im2sequence_grad, ops::Im2SequenceGradOp);
-REGISTER_OP_CPU_KERNEL(im2sequence,
-                       ops::Im2SequenceKernel<phi::CPUContext, float>);
-REGISTER_OP_CPU_KERNEL(im2sequence_grad,
-                       ops::Im2SequenceGradKernel<phi::CPUContext, float>);
-
-REGISTER_OP_CUDA_KERNEL(im2sequence,
-                        ops::Im2SequenceKernel<phi::GPUContext, float>);
-REGISTER_OP_CUDA_KERNEL(im2sequence_grad,
-                        ops::Im2SequenceGradKernel<phi::GPUContext, float>);
+
+PD_REGISTER_STRUCT_KERNEL(
+    im2sequence, CPU, ALL_LAYOUT, ops::Im2SequenceKernel, float) {}
+PD_REGISTER_STRUCT_KERNEL(
+    im2sequence_grad, CPU, ALL_LAYOUT, ops::Im2SequenceGradKernel, float) {}
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_REGISTER_STRUCT_KERNEL(
+    im2sequence, GPU, ALL_LAYOUT, ops::Im2SequenceKernel, float) {}
+PD_REGISTER_STRUCT_KERNEL(
+    im2sequence_grad, GPU, ALL_LAYOUT, ops::Im2SequenceGradKernel, float) {}
+#endif
diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h
@@ -33,7 +33,7 @@ inline int Im2SeqOutputSize(
   return output_size;
 }
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class Im2SequenceKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -153,7 +153,7 @@ class Im2SequenceKernel : public framework::OpKernel<T> {
   }
 };
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class Im2SequenceGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {

diff --git a/paddle/fluid/operators/inplace_abn_op.cc b/paddle/fluid/operators/inplace_abn_op.cc
@@ -210,7 +210,7 @@ class InplaceABNOpGradMaker : public framework::SingleGradOpMaker<T> {
   }
 };
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class InplaceABNKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -270,7 +270,7 @@ class InplaceABNKernel : public framework::OpKernel<T> {
   }
 };
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class InplaceABNGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -373,9 +373,11 @@ REGISTER_OPERATOR(inplace_abn,
                   InplaceAbnOpInplaceInferer)
 REGISTER_OPERATOR(inplace_abn_grad, ops::InplaceABNGradOp)
 
-REGISTER_OP_CPU_KERNEL(inplace_abn,
-                       ops::InplaceABNKernel<phi::CPUContext, float>,
-                       ops::InplaceABNKernel<phi::CPUContext, double>);
-REGISTER_OP_CPU_KERNEL(inplace_abn_grad,
-                       ops::InplaceABNGradKernel<phi::CPUContext, float>,
-                       ops::InplaceABNGradKernel<phi::CPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(
+    inplace_abn, CPU, ALL_LAYOUT, ops::InplaceABNKernel, float, double) {}
+PD_REGISTER_STRUCT_KERNEL(inplace_abn_grad,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::InplaceABNGradKernel,
+                          float,
+                          double) {}
diff --git a/paddle/fluid/operators/inplace_abn_op.cu b/paddle/fluid/operators/inplace_abn_op.cu
@@ -23,7 +23,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class InplaceABNKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -109,7 +109,7 @@ class InplaceABNKernel : public framework::OpKernel<T> {
 
 // Deriving the Gradient for the Backward Pass of Batch Normalization
 // https://kevinzakka.github.io/2016/09/14/batch_normalization/
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class InplaceABNGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -221,15 +221,17 @@ namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 #ifdef PADDLE_WITH_HIP
 // MIOPEN do not support double
-REGISTER_OP_CUDA_KERNEL(inplace_abn,
-                        ops::InplaceABNKernel<phi::GPUContext, float>);
-REGISTER_OP_CUDA_KERNEL(inplace_abn_grad,
-                        ops::InplaceABNGradKernel<phi::GPUContext, float>);
+PD_REGISTER_STRUCT_KERNEL(
+    inplace_abn, GPU, ALL_LAYOUT, ops::InplaceABNKernel, float) {}
+PD_REGISTER_STRUCT_KERNEL(
+    inplace_abn_grad, GPU, ALL_LAYOUT, ops::InplaceABNGradKernel, float) {}
 #else
-REGISTER_OP_CUDA_KERNEL(inplace_abn,
-                        ops::InplaceABNKernel<phi::GPUContext, float>,
-                        ops::InplaceABNKernel<phi::GPUContext, double>);
-REGISTER_OP_CUDA_KERNEL(inplace_abn_grad,
-                        ops::InplaceABNGradKernel<phi::GPUContext, float>,
-                        ops::InplaceABNGradKernel<phi::GPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(
+    inplace_abn, GPU, ALL_LAYOUT, ops::InplaceABNKernel, float, double) {}
+PD_REGISTER_STRUCT_KERNEL(inplace_abn_grad,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::InplaceABNGradKernel,
+                          float,
+                          double) {}
 #endif
diff --git a/paddle/fluid/operators/limit_by_capacity_op.cc b/paddle/fluid/operators/limit_by_capacity_op.cc
@@ -77,10 +77,13 @@ class LimitByCapacityOpMaker : public framework::OpProtoAndCheckerMaker {
 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 
-REGISTER_OP_CPU_KERNEL(limit_by_capacity,
-                       ops::LimitByCapacityOpCPUKernel<int>,
-                       ops::LimitByCapacityOpCPUKernel<int64_t>);
-
 REGISTER_OP_WITHOUT_GRADIENT(limit_by_capacity,
                              ops::LimitByCapacityOp,
                              ops::LimitByCapacityOpMaker);
+
+PD_REGISTER_STRUCT_KERNEL(limit_by_capacity,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::LimitByCapacityOpCPUKernel,
+                          int,
+                          int64_t) {}
diff --git a/paddle/fluid/operators/limit_by_capacity_op.cu b/paddle/fluid/operators/limit_by_capacity_op.cu
@@ -47,7 +47,7 @@ __global__ void limit_by_capacity_impl(
   }
 }
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class LimitByCapacityOpCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -78,7 +78,8 @@ class LimitByCapacityOpCUDAKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-namespace plat = paddle::platform;
-
-REGISTER_OP_CUDA_KERNEL(limit_by_capacity,
-                        ops::LimitByCapacityOpCUDAKernel<int64_t>);
+PD_REGISTER_STRUCT_KERNEL(limit_by_capacity,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::LimitByCapacityOpCUDAKernel,
+                          int64_t) {}
diff --git a/paddle/fluid/operators/limit_by_capacity_op.h b/paddle/fluid/operators/limit_by_capacity_op.h
@@ -24,7 +24,7 @@
 namespace paddle {
 namespace operators {
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class LimitByCapacityOpCPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {

diff --git a/paddle/fluid/operators/linear_chain_crf_op.cc b/paddle/fluid/operators/linear_chain_crf_op.cc
@@ -395,10 +395,16 @@ REGISTER_OPERATOR(linear_chain_crf,
 REGISTER_OPERATOR(linear_chain_crf_grad,
                   ops::LinearChainCRFGradOp,
                   ops::LinearChainCRFGradNoNeedBufferVarsInferer);
-REGISTER_OP_CPU_KERNEL(linear_chain_crf,
-                       ops::LinearChainCRFOpKernel<phi::CPUContext, float>,
-                       ops::LinearChainCRFOpKernel<phi::CPUContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    linear_chain_crf_grad,
-    ops::LinearChainCRFGradOpKernel<phi::CPUContext, float>,
-    ops::LinearChainCRFGradOpKernel<phi::CPUContext, double>);
+
+PD_REGISTER_STRUCT_KERNEL(linear_chain_crf,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::LinearChainCRFOpKernel,
+                          float,
+                          double) {}
+PD_REGISTER_STRUCT_KERNEL(linear_chain_crf_grad,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::LinearChainCRFGradOpKernel,
+                          float,
+                          double) {}
diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h
@@ -48,7 +48,7 @@ struct ScalarMul {
 
 using framework::LoD;
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class LinearChainCRFOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -245,7 +245,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
   }
 };
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {

diff --git a/paddle/fluid/operators/margin_rank_loss_op.cc b/paddle/fluid/operators/margin_rank_loss_op.cc
@@ -181,7 +181,11 @@ REGISTER_OPERATOR(margin_rank_loss,
                   ops::MarginRankLossGradMaker<paddle::framework::OpDesc>,
                   ops::MarginRankLossGradMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(margin_rank_loss_grad, ops::MarginRankLossGradOp);
-REGISTER_OP_CPU_KERNEL(margin_rank_loss,
-                       ops::MarginRankLossKernel<phi::CPUContext, float>);
-REGISTER_OP_CPU_KERNEL(margin_rank_loss_grad,
-                       ops::MarginRankLossGradKernel<phi::CPUContext, float>);
+
+PD_REGISTER_STRUCT_KERNEL(
+    margin_rank_loss, CPU, ALL_LAYOUT, ops::MarginRankLossKernel, float) {}
+PD_REGISTER_STRUCT_KERNEL(margin_rank_loss_grad,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::MarginRankLossGradKernel,
+                          float) {}
diff --git a/paddle/fluid/operators/margin_rank_loss_op.cu b/paddle/fluid/operators/margin_rank_loss_op.cu
@@ -16,7 +16,10 @@ limitations under the License. */
 
 namespace ops = paddle::operators;
 
-REGISTER_OP_CUDA_KERNEL(margin_rank_loss,
-                        ops::MarginRankLossKernel<phi::GPUContext, float>);
-REGISTER_OP_CUDA_KERNEL(margin_rank_loss_grad,
-                        ops::MarginRankLossGradKernel<phi::GPUContext, float>);
+PD_REGISTER_STRUCT_KERNEL(
+    margin_rank_loss, GPU, ALL_LAYOUT, ops::MarginRankLossKernel, float) {}
+PD_REGISTER_STRUCT_KERNEL(margin_rank_loss_grad,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::MarginRankLossGradKernel,
+                          float) {}
diff --git a/paddle/fluid/operators/margin_rank_loss_op.h b/paddle/fluid/operators/margin_rank_loss_op.h
@@ -34,7 +34,7 @@ struct Heaviside {
   }
 };
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class MarginRankLossKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const {
@@ -62,7 +62,7 @@ class MarginRankLossKernel : public framework::OpKernel<T> {
   }
 };
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class MarginRankLossGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const {

diff --git a/paddle/fluid/operators/modified_huber_loss_op.cc b/paddle/fluid/operators/modified_huber_loss_op.cc
@@ -176,7 +176,11 @@ REGISTER_OPERATOR(
     ops::ModifiedHuberLossGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(modified_huber_loss_grad, ops::ModifiedHuberLossGradOp);
 
-REGISTER_OP_CPU_KERNEL(modified_huber_loss,
-                       ops::ModifiedHuberLossKernel<phi::CPUContext, float>);
-REGISTER_OP_CPU_KERNEL(modified_huber_loss_grad,
-                       ops::ModifiedHuberLossGradCPUKernel<float>);
+PD_REGISTER_STRUCT_KERNEL(
+    modified_huber_loss, CPU, ALL_LAYOUT, ops::ModifiedHuberLossKernel, float) {
+}
+PD_REGISTER_STRUCT_KERNEL(modified_huber_loss_grad,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::ModifiedHuberLossGradCPUKernel,
+                          float) {}
diff --git a/paddle/fluid/operators/modified_huber_loss_op.cu b/paddle/fluid/operators/modified_huber_loss_op.cu
@@ -39,7 +39,7 @@ struct ModifiedHuberLossBackward {
   }
 };
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class ModifiedHuberLossGradGPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -74,7 +74,12 @@ class ModifiedHuberLossGradGPUKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(modified_huber_loss,
-                        ops::ModifiedHuberLossKernel<phi::GPUContext, float>);
-REGISTER_OP_CUDA_KERNEL(modified_huber_loss_grad,
-                        ops::ModifiedHuberLossGradGPUKernel<float>);
+
+PD_REGISTER_STRUCT_KERNEL(
+    modified_huber_loss, GPU, ALL_LAYOUT, ops::ModifiedHuberLossKernel, float) {
+}
+PD_REGISTER_STRUCT_KERNEL(modified_huber_loss_grad,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::ModifiedHuberLossGradGPUKernel,
+                          float) {}
diff --git a/paddle/fluid/operators/modified_huber_loss_op.h b/paddle/fluid/operators/modified_huber_loss_op.h
@@ -52,7 +52,7 @@ struct ModifiedHuberLossForward {
   }
 };
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class ModifiedHuberLossKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -79,7 +79,7 @@ class ModifiedHuberLossKernel : public framework::OpKernel<T> {
 };
 
 // CPU backward kernel
-template <typename T>
+template <typename T, typename DeviceContext>
 class ModifiedHuberLossGradCPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {