PaddlePaddle · luotao1 · Aug 7, 2024 · Jul 30, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc
diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cu.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cu.cc
diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.h b/paddle/fluid/operators/collective/c_sync_calc_stream_op.h
diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.kps b/paddle/fluid/operators/collective/c_sync_calc_stream_op.kps
diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc
diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.cc b/paddle/fluid/pir/dialect/operator/utils/utils.cc
@@ -39,7 +39,6 @@ const std::unordered_set<std::string> LegacyOpList = {
     LoadCombineOp::name(),
     CConcatOp::name(),
     CBroadcast_Op::name(),
-    CSyncCalcStream_Op::name(),
     CSyncCommStream_Op::name(),
     DistributedPushSparseOp::name(),
     SendV2Op::name(),

diff --git a/paddle/phi/kernels/gpu/sync_calc_stream_kernel.cu b/paddle/phi/kernels/gpu/sync_calc_stream_kernel.cu
@@ -0,0 +1,26 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/impl/sync_calc_stream_kernel_impl.h"
+
+PD_REGISTER_KERNEL(sync_calc_stream,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::SyncCalcStreamKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/impl/sync_calc_stream_kernel_impl.h b/paddle/phi/kernels/impl/sync_calc_stream_kernel_impl.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void SyncCalcStreamKernel(const Context &dev_ctx,
+                          const DenseTensor &x,
+                          DenseTensor *out) {
+#if (defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)) && !defined(_WIN32)
+  phi::backends::gpu::GpuStreamSync(dev_ctx.stream());
+#elif defined(PADDLE_WITH_XPU_BKCL)
+  auto place = dev_ctx.GetPlace();
+  PADDLE_ENFORCE_EQ(place.GetType() == phi::AllocationType::XPU,
+                    true,
+                    phi::errors::PreconditionNotMet(
+                        "Sync stream op can run on xpu place only for now."));
+  dev_ctx.Wait();
+#else
+  PADDLE_THROW(
+      phi::errors::PreconditionNotMet("PaddlePaddle should compile with GPU."));
+#endif
+}
+}  // namespace phi
diff --git a/paddle/phi/kernels/kps/sync_calc_stream_kernel.kps b/paddle/phi/kernels/kps/sync_calc_stream_kernel.kps
@@ -0,0 +1,38 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifdef PADDLE_WITH_XPU_KP
+
+// Please do not modify the following code
+#if defined(__CUDA_ARCH__)
+#undef __CUDA_ARCH__
+#endif
+
+#if defined(__CUDACC__)
+#undef __CUDACC__
+#endif
+
+#if defined(__CUDA__)
+#undef __CUDA__
+#endif
+
+#if defined(__NVCC__)
+#undef __NVCC__
+#endif
+
+#include "paddle/phi/kernels/impl/sync_calc_stream_kernel_impl.h"
+
+PD_REGISTER_KERNEL(
+    sync_calc_stream, KPS, ALL_LAYOUT, phi::SyncCalcStreamKernel, float) {}
+#endif
diff --git a/paddle/phi/kernels/xpu/sync_calc_stream_kernel.cc b/paddle/phi/kernels/xpu/sync_calc_stream_kernel.cc
@@ -0,0 +1,26 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/impl/sync_calc_stream_kernel_impl.h"
+
+PD_REGISTER_KERNEL(sync_calc_stream,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::SyncCalcStreamKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   phi::dtype::bfloat16,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/ops/yaml/op_compat.yaml b/paddle/phi/ops/yaml/op_compat.yaml
@@ -4082,12 +4082,6 @@
   outputs :
     out: Out
 
-- op: c_sync_calc_stream
-  inputs :
-    x : X
-  outputs :
-    out : Out
-
 - op: c_sync_comm_stream
   inputs :
     x : X
@@ -4611,6 +4605,12 @@
   outputs:
     out : Out
 
+- op: sync_calc_stream(c_sync_calc_stream)
+  inputs :
+    x : X
+  outputs :
+    out : Out
+
 - op: temporal_shift
   backward: temporal_shift_grad
   inputs :

diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
@@ -4482,6 +4482,16 @@
   inplace : (mean -> mean_out), (variance -> variance_out)
   optional : reserve_space
 
+- op : sync_calc_stream
+  args : (Tensor x)
+  output : Tensor(out)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : sync_calc_stream
+  inplace : (x -> out)
+
 - op : take_along_axis
   args : (Tensor arr, Tensor indices, int axis)
   output : Tensor