PaddlePaddle · luotao1 · May 22, 2025 · May 14, 2025 · May 14, 2025 · May 14, 2025
diff --git a/paddle/fluid/pybind/auto_parallel_py.cc b/paddle/fluid/pybind/auto_parallel_py.cc
@@ -926,6 +926,10 @@ static void parse_attr(PyObject *obj,
     auto attr =
         CastPyArg2Float(obj, infer_spmd_string, static_cast<ssize_t>(arg_pos));
     ctx->EmplaceBackAttr(attr);
+  } else if (PyObject_CheckDataType(obj)) {
+    auto attr = CastPyArg2DataType(
+        obj, infer_spmd_string, static_cast<ssize_t>(arg_pos));
+    ctx->EmplaceBackAttr(attr);
   } else {  // TODO(ljz) support other types
     PADDLE_THROW(common::errors::InvalidArgument(
         "%s(): argument (position %d) must be "

diff --git a/paddle/fluid/pybind/op_function_common.h b/paddle/fluid/pybind/op_function_common.h
@@ -44,6 +44,8 @@ bool PyObject_CheckBool(PyObject** obj);
 
 bool PyObject_CheckLong(PyObject* obj);
 
+bool PyObject_CheckDataType(PyObject* obj);
+
 int32_t PyObject_ToInt32(PyObject* obj);
 
 uint32_t PyObject_ToUInt32(PyObject* obj);

diff --git a/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.cc b/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.cc
@@ -66,6 +66,7 @@ AttrType InferSpmdContext::AttrAt(size_t idx) const {
 template float InferSpmdContext::AttrAt(size_t idx) const;
 template int InferSpmdContext::AttrAt(size_t idx) const;
 template int64_t InferSpmdContext::AttrAt(size_t idx) const;
+template DataType InferSpmdContext::AttrAt(size_t idx) const;
 
 template <>
 bool InferSpmdContext::AttrAt(size_t idx) const {

diff --git a/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h b/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h
@@ -178,6 +178,7 @@ struct InferSpmdFnImpl<Return (*)(Args...), infer_spmd_fn> {
   PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_ATTRIBUTE(int);
   PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_ATTRIBUTE(float);
   PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_ATTRIBUTE(int64_t);
+  PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_ATTRIBUTE(DataType);
   PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_CONST_ATTRIBUTE_REF(std::vector<int>);
   PD_SPECIALIZE_InferSpmdFnCallHelper_FOR_CONST_ATTRIBUTE_REF(
       std::vector<int64_t>);

@@ -0,0 +1,34 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/infermeta/spmd_rules/cummax.h"
+#include "paddle/phi/infermeta/spmd_rules/topk.h"
+
+namespace phi {
+namespace distributed {
+
+SpmdInfo CummaxInferSpmd(const DistMetaTensor& x, int axis, DataType dtype) {
+  return TopkInferSpmdBase(x, axis);
+}
+
+SpmdInfo CummaxGradInferSpmd(const DistMetaTensor& x,
+                             const DistMetaTensor& indices,
+                             const DistMetaTensor& out_grad,
+                             int axis,
+                             DataType dtype) {
+  return TopkGradInferSpmdBase(x, indices, out_grad, axis);
+}
+
+}  // namespace distributed
+}  // namespace phi
@@ -0,0 +1,32 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h"
+#include "paddle/phi/core/distributed/type_defs.h"
+
+namespace phi {
+namespace distributed {
+
+SpmdInfo CummaxInferSpmd(const DistMetaTensor& x, int axis, DataType dtype);
+
+SpmdInfo CummaxGradInferSpmd(const DistMetaTensor& x,
+                             const DistMetaTensor& indices,
+                             const DistMetaTensor& out_grad,
+                             int axis,
+                             DataType dtype);
+
+}  // namespace distributed
+}  // namespace phi
@@ -0,0 +1,34 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/infermeta/spmd_rules/cummin.h"
+#include "paddle/phi/infermeta/spmd_rules/topk.h"
+
+namespace phi {
+namespace distributed {
+
+SpmdInfo CumminInferSpmd(const DistMetaTensor& x, int axis, DataType dtype) {
+  return TopkInferSpmdBase(x, axis);
+}
+
+SpmdInfo CumminGradInferSpmd(const DistMetaTensor& x,
+                             const DistMetaTensor& indices,
+                             const DistMetaTensor& out_grad,
+                             int axis,
+                             DataType dtype) {
+  return TopkGradInferSpmdBase(x, indices, out_grad, axis);
+}
+
+}  // namespace distributed
+}  // namespace phi
@@ -0,0 +1,32 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h"
+#include "paddle/phi/core/distributed/type_defs.h"
+
+namespace phi {
+namespace distributed {
+
+SpmdInfo CumminInferSpmd(const DistMetaTensor& x, int axis, DataType dtype);
+
+SpmdInfo CumminGradInferSpmd(const DistMetaTensor& x,
+                             const DistMetaTensor& indices,
+                             const DistMetaTensor& out_grad,
+                             int axis,
+                             DataType dtype);
+
+}  // namespace distributed
+}  // namespace phi
@@ -753,6 +753,15 @@ PD_REGISTER_SPMD_RULE(nonzero,
 // add_n
 PD_REGISTER_SPMD_RULE(add_n, PD_INFER_SPMD(phi::distributed::AddNInferSpmd));
 
+// cummax
+PD_REGISTER_SPMD_RULE(cummax,
+                      PD_INFER_SPMD(phi::distributed::CummaxInferSpmd),
+                      PD_INFER_SPMD(phi::distributed::CummaxGradInferSpmd));
+// cummin
+PD_REGISTER_SPMD_RULE(cummin,
+                      PD_INFER_SPMD(phi::distributed::CumminInferSpmd),
+                      PD_INFER_SPMD(phi::distributed::CumminGradInferSpmd));
+
 // argsort
 PD_REGISTER_SPMD_RULE(argsort,
                       PD_INFER_SPMD(phi::distributed::ArgSortInferSpmd),

@@ -28,6 +28,8 @@ limitations under the License. */
 #include "paddle/phi/infermeta/spmd_rules/concat.h"
 #include "paddle/phi/infermeta/spmd_rules/conv2d.h"
 #include "paddle/phi/infermeta/spmd_rules/cross_entropy_with_softmax.h"
+#include "paddle/phi/infermeta/spmd_rules/cummax.h"
+#include "paddle/phi/infermeta/spmd_rules/cummin.h"
 #include "paddle/phi/infermeta/spmd_rules/cumsum.h"
 #include "paddle/phi/infermeta/spmd_rules/default_data_parallel.h"
 #include "paddle/phi/infermeta/spmd_rules/dropout.h"

@@ -20,8 +20,7 @@ limitations under the License. */
 namespace phi {
 namespace distributed {
 
-SpmdInfo TopkInferSpmd(
-    const DistMetaTensor& x, int k, int axis, bool largest, bool sorted) {
+SpmdInfo TopkInferSpmdBase(const DistMetaTensor& x, int axis) {
   // Verify input args
   EXTRACT_SHAPE_AND_DIST_ATTR(x);
   axis = axis < 0 ? x_ndim + axis : axis;
@@ -60,13 +59,10 @@ SpmdInfo TopkInferSpmd(
   return {{x_dist_attr_dst}, {out_dist_attr_dst, indices_dist_attr_dst}};
 }
 
-SpmdInfo TopkGradInferSpmd(const DistMetaTensor& x,
-                           const DistMetaTensor& indices,
-                           const DistMetaTensor& out_grad,
-                           int k,
-                           int axis,
-                           bool largest,
-                           bool sorted) {
+SpmdInfo TopkGradInferSpmdBase(const DistMetaTensor& x,
+                               const DistMetaTensor& indices,
+                               const DistMetaTensor& out_grad,
+                               int axis) {
   // Verify input args
   EXTRACT_SHAPE_AND_DIST_ATTR(x);
   EXTRACT_SHAPE_AND_DIST_ATTR(indices);
@@ -95,24 +91,28 @@ SpmdInfo TopkGradInferSpmd(const DistMetaTensor& x,
           axis));
   // Build einsum notation
   std::string alphabet = "abcdefghijlopqrstuvwxyz";
-  std::string x_axes = alphabet.substr(0, x_ndim - 1);
+  std::string x_axes = alphabet.substr(0, x_ndim);
   std::string indices_axes = x_axes;
   std::string out_grad_axes = x_axes;
+  std::vector<int64_t> x_dims_mapping(x_dims_mapping_src);
+  std::vector<int64_t> indices_dims_mapping(indices_dims_mapping_src);
+  std::vector<int64_t> out_grad_dims_mapping(out_grad_dims_mapping_src);
+  x_dims_mapping[axis] = -1;
+  indices_dims_mapping[axis] = -1;
+  out_grad_dims_mapping[axis] = -1;
 
   // Merge sharding
   std::pair<std::string, std::vector<int64_t>> indices_pair(
-      indices_axes, indices_dims_mapping_src);
+      indices_axes, indices_dims_mapping);
   std::pair<std::string, std::vector<int64_t>> out_grad_pair(
-      out_grad_axes, out_grad_dims_mapping_src);
-  std::pair<std::string, std::vector<int64_t>> x_pair(x_axes,
-                                                      x_dims_mapping_src);
+      out_grad_axes, out_grad_dims_mapping);
+  std::pair<std::string, std::vector<int64_t>> x_pair(x_axes, x_dims_mapping);
   auto axis_to_dim_map =
       ShardingMergeForTensors({x_pair, indices_pair, out_grad_pair});
 
   // Infer dims mapping
   std::vector<int64_t> x_grad_dims_mapping_dst =
       GetDimsMappingForAxes(x_axes, axis_to_dim_map);
-  x_grad_dims_mapping_dst.insert(x_grad_dims_mapping_dst.begin() + axis, -1);
   std::vector<int64_t> x_dims_mapping_dst = x_grad_dims_mapping_dst;
   std::vector<int64_t> indices_dims_mapping_dst = x_grad_dims_mapping_dst;
   std::vector<int64_t> out_grad_dims_mapping_dst = x_grad_dims_mapping_dst;
@@ -141,6 +141,22 @@ SpmdInfo TopkGradInferSpmd(const DistMetaTensor& x,
   return {{x_dist_attr_dst, indices_dist_attr_dst, out_grad_dist_attr_dst},
           {x_grad_dist_attr_dst}};
 }
+
+SpmdInfo TopkInferSpmd(
+    const DistMetaTensor& x, int k, int axis, bool largest, bool sorted) {
+  return TopkInferSpmdBase(x, axis);
+}
+
+SpmdInfo TopkGradInferSpmd(const DistMetaTensor& x,
+                           const DistMetaTensor& indices,
+                           const DistMetaTensor& out_grad,
+                           int k,
+                           int axis,
+                           bool largest,
+                           bool sorted) {
+  return TopkGradInferSpmdBase(x, indices, out_grad, axis);
+}
+
 SpmdInfo TopkInferSpmdDynamic(const DistMetaTensor& x,
                               const Scalar& k,
                               int axis,

@@ -20,7 +20,11 @@ limitations under the License. */
 
 namespace phi {
 namespace distributed {
-
+SpmdInfo TopkInferSpmdBase(const DistMetaTensor& x, int axis);
+SpmdInfo TopkGradInferSpmdBase(const DistMetaTensor& x,
+                               const DistMetaTensor& indices,
+                               const DistMetaTensor& out_grad,
+                               int axis);
 SpmdInfo TopkInferSpmd(
     const DistMetaTensor& x, int k, int axis, bool largest, bool sorted);
 

diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml
@@ -698,6 +698,7 @@
   infer_meta :
     func : UnchangedInferMeta
     param: [x]
+    spmd_rule : CummaxGradInferSpmd
   kernel :
     func : cummax_grad
     data_type : out_grad
@@ -709,6 +710,7 @@
   infer_meta :
     func : UnchangedInferMeta
     param: [x]
+    spmd_rule : CumminGradInferSpmd
   kernel :
     func : cummin_grad
     data_type : out_grad
@@ -1979,6 +1981,7 @@
   infer_meta :
     func : UnchangedInferMeta
     param: [out]
+    spmd_rule : SoftmaxGradInferSpmd
   kernel :
     func : log_softmax_grad
     data_type : out_grad

diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
@@ -1276,6 +1276,7 @@
   output : Tensor(out), Tensor(indices)
   infer_meta :
     func : CumWithIndicesInferMeta
+    spmd_rule : CummaxInferSpmd
   kernel :
     func : cummax
     data_type : x
@@ -1287,6 +1288,7 @@
   output : Tensor(out), Tensor(indices)
   infer_meta :
     func : CumWithIndicesInferMeta
+    spmd_rule : CumminInferSpmd
   kernel :
     func : cummin
     data_type : x
@@ -3114,6 +3116,7 @@
   output : Tensor(out)
   infer_meta :
     func : UnchangedInferMetaCheckAxis
+    spmd_rule : SoftmaxInferSpmd
   kernel :
     func : log_softmax
     data_type : x

diff --git a/test/auto_parallel/spmd_rules/CMakeLists.txt b/test/auto_parallel/spmd_rules/CMakeLists.txt
@@ -48,6 +48,8 @@ if(WITH_DISTRIBUTE)
     py_test_modules(test_add_n_rule MODULES test_add_n_rule)
     py_test_modules(test_mean_all_rule MODULES test_mean_all_rule)
     py_test_modules(test_argmin_rule MODULES test_argmin_rule)
+    py_test_modules(test_cummax_rule MODULES test_cummax_rule)
+    py_test_modules(test_cummin_rule MODULES test_cummin_rule)
     py_test_modules(test_argsort_rule MODULES test_argsort_rule)
   endif()
   # End of unittests WITH single card WITHOUT timeout