Fix softmax and prelu ut problem. (PaddlePaddle#1262)

will-core · web-flow · commit 08fa89f8b763 · 2024-06-17T15:10:36.000+08:00
diff --git a/backends/npu/kernels/prelu_kernel.cc b/backends/npu/kernels/prelu_kernel.cc
@@ -91,8 +91,7 @@ void PReluKernel(const Context& dev_ctx,
       FillNpuTensorWithConstant<T>(out, dev_ctx, val);
       out->Resize(out_dim);
     } else {
-      const auto& runner = NpuOpRunner("PRelu", {x, alpha}, {*out}, {});
-      runner.Run(stream);
+      EXEC_NPU_CMD(aclnnPrelu, dev_ctx, x, alpha, *out);
     }
   }
 }
@@ -207,9 +206,13 @@ void PReluGradKernel(const Context& dev_ctx,
       x_grad->Resize(x_grad_dim);
     } else {
       phi::DenseTensor weight(alpha);
-      const auto& runner = NpuOpRunner(
-          "PReluGrad", {out_grad, x, weight}, {*x_grad, *alpha_grad}, {});
-      runner.Run(stream);
+      EXEC_NPU_CMD(aclnnPreluBackward,
+                   dev_ctx,
+                   out_grad,
+                   x,
+                   weight,
+                   *x_grad,
+                   *alpha_grad);
     }
   }
 }
diff --git a/backends/npu/kernels/softmax_kernel.cc b/backends/npu/kernels/softmax_kernel.cc
@@ -17,6 +17,12 @@
 
 namespace custom_kernel {
 
+template <typename T, typename Context>
+void CastKernel(const Context& dev_ctx,
+                const phi::DenseTensor& x,
+                phi::DataType dtype,
+                phi::DenseTensor* out);
+
 template <typename T, typename Context>
 void AclopSoftmaxKernel(const Context& dev_ctx,
                         const phi::DenseTensor& x,
@@ -141,7 +147,32 @@ void SoftmaxGradKernel(const Context& dev_ctx,
                        dev_ctx, out, out_grad, axis, x_grad)));
   dev_ctx.template Alloc<T>(x_grad);
   int64_t dim = static_cast<int64_t>(axis);
-  EXEC_NPU_CMD(aclnnSoftmaxBackward, dev_ctx, out_grad, out, dim, *x_grad);
+
+  phi::DenseTensor cast_x;
+  if (out_grad.dtype() == phi::DataType::FLOAT64) {
+    phi::DenseTensorMeta meta(out_grad.meta());
+    meta.dtype = phi::DataType::FLOAT32;
+    cast_x.set_meta(meta);
+
+    custom_kernel::CastKernel<T, Context>(
+        dev_ctx, out_grad, phi::DataType::FLOAT32, &cast_x);
+  } else {
+    cast_x = out_grad;
+  }
+
+  phi::DenseTensor cast_y;
+  if (out.dtype() == phi::DataType::FLOAT64) {
+    phi::DenseTensorMeta meta(out.meta());
+    meta.dtype = phi::DataType::FLOAT32;
+    cast_y.set_meta(meta);
+
+    custom_kernel::CastKernel<T, Context>(
+        dev_ctx, out, phi::DataType::FLOAT32, &cast_y);
+  } else {
+    cast_y = out;
+  }
+
+  EXEC_NPU_CMD(aclnnSoftmaxBackward, dev_ctx, cast_x, cast_y, dim, *x_grad);
 }
 
 }  // namespace custom_kernel

Original file line number	Diff line number	Diff line change
`@@ -91,8 +91,7 @@ void PReluKernel(const Context& dev_ctx,`
`91`	`91`	`FillNpuTensorWithConstant<T>(out, dev_ctx, val);`
`92`	`92`	`out->Resize(out_dim);`
`93`	`93`	`} else {`
`94`		`- const auto& runner = NpuOpRunner("PRelu", {x, alpha}, {*out}, {});`
`95`		`- runner.Run(stream);`
	`94`	`+ EXEC_NPU_CMD(aclnnPrelu, dev_ctx, x, alpha, *out);`
`96`	`95`	`}`
`97`	`96`	`}`
`98`	`97`	`}`
`@@ -207,9 +206,13 @@ void PReluGradKernel(const Context& dev_ctx,`
`207`	`206`	`x_grad->Resize(x_grad_dim);`
`208`	`207`	`} else {`
`209`	`208`	`phi::DenseTensor weight(alpha);`
`210`		`- const auto& runner = NpuOpRunner(`
`211`		`- "PReluGrad", {out_grad, x, weight}, {x_grad, alpha_grad}, {});`
`212`		`- runner.Run(stream);`
	`209`	`+ EXEC_NPU_CMD(aclnnPreluBackward,`
	`210`	`+ dev_ctx,`
	`211`	`+ out_grad,`
	`212`	`+ x,`
	`213`	`+ weight,`
	`214`	`+ *x_grad,`
	`215`	`+ *alpha_grad);`
`213`	`216`	`}`
`214`	`217`	`}`
`215`	`218`	`}`