[AutoParallel] Add expand spmd (#71603)

Xing-lil · web-flow · commit 7942f4658f49 · 2025-03-18T16:51:10.000+08:00
diff --git a/paddle/phi/infermeta/spmd_rules/expand.cc b/paddle/phi/infermeta/spmd_rules/expand.cc
@@ -0,0 +1,63 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/infermeta/spmd_rules/expand.h"
+
+#include "paddle/phi/infermeta/spmd_rules/spmd_rule_macro_define.h"
+#include "paddle/phi/infermeta/spmd_rules/utils.h"
+
+namespace phi::distributed {
+
+SpmdInfo ExpandInferSpmd(const DistMetaTensor& x, const IntArray& shape) {
+  EXTRACT_SHAPE_AND_DIST_ATTR(x);
+  auto expand_shape = shape.GetData();
+  std::vector<int64_t> out_dims_mapping(shape.size());
+  int diff = expand_shape.size() - x_shape.size();
+  for (int i = expand_shape.size() - 1; i >= diff; --i) {
+    if (expand_shape[i] != -1 && expand_shape[i] != x_shape[i - diff]) {
+      out_dims_mapping[i] = -1;
+    } else {
+      out_dims_mapping[i] = x_dims_mapping_src[i - diff];
+    }
+  }
+  for (int i = 0; i < diff; i++) {
+    out_dims_mapping[i] = -1;
+  }
+  TensorDistAttr out_dist_attr = CopyTensorDistAttrForOutput(x_dist_attr_src);
+  out_dist_attr.set_dims_mapping(out_dims_mapping);
+  return {{x_dist_attr_src}, {out_dist_attr}};
+}
+
+SpmdInfo ExpandGradInferSpmd(const DistMetaTensor& x,
+                             const DistMetaTensor& out_grad,
+                             const IntArray& shape) {
+  EXTRACT_SHAPE_AND_DIST_ATTR(x);
+  EXTRACT_SHAPE_AND_DIST_ATTR(out_grad);
+  if (x_shape.size() == out_grad_shape.size()) {
+    return {{x_dist_attr_src, out_grad_dist_attr_src}, {x_dist_attr_src}};
+  }
+  size_t axis =
+      std::abs(static_cast<int>(out_grad.dims().size() - x.dims().size()));
+  std::vector<int64_t> x_grad_dims_mapping;
+  for (size_t i = 0; i < out_grad_dims_mapping_src.size(); ++i) {
+    if (i < axis || i >= axis + x.dims().size() ||
+        out_grad.dims()[i] != x.dims()[i - axis]) {
+      continue;
+    }
+    x_grad_dims_mapping.push_back(out_grad_dims_mapping_src[i]);
+  }
+  TensorDistAttr x_grad_dist_attr =
+      CopyTensorDistAttrForOutput(x_dist_attr_src);
+  x_grad_dist_attr.set_dims_mapping(x_grad_dims_mapping);
+  return {{x_dist_attr_src, out_grad_dist_attr_src}, {x_grad_dist_attr}};
+}
+
+}  // namespace phi::distributed
diff --git a/paddle/phi/infermeta/spmd_rules/expand.h b/paddle/phi/infermeta/spmd_rules/expand.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/common/int_array.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h"
+#include "paddle/phi/core/distributed/type_defs.h"
+
+namespace phi {
+namespace distributed {
+SpmdInfo ExpandInferSpmd(const DistMetaTensor& x, const IntArray& shape);
+
+SpmdInfo ExpandGradInferSpmd(const DistMetaTensor& x,
+                             const DistMetaTensor& out_grad,
+                             const IntArray& shape);
+
+}  // namespace distributed
+}  // namespace phi
diff --git a/paddle/phi/infermeta/spmd_rules/rules.h b/paddle/phi/infermeta/spmd_rules/rules.h
@@ -31,6 +31,7 @@ limitations under the License. */
 #include "paddle/phi/infermeta/spmd_rules/dropout.h"
 #include "paddle/phi/infermeta/spmd_rules/elementwise.h"
 #include "paddle/phi/infermeta/spmd_rules/embedding.h"
+#include "paddle/phi/infermeta/spmd_rules/expand.h"
 #include "paddle/phi/infermeta/spmd_rules/expand_as.h"
 #include "paddle/phi/infermeta/spmd_rules/flash_attention.h"
 #include "paddle/phi/infermeta/spmd_rules/flatten.h"
diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml
@@ -997,6 +997,7 @@
   infer_meta :
     func : UnchangedInferMeta
     param : [x]
+    spmd_rule : ExpandGradInferSpmd
   kernel :
     func : expand_grad
     data_type : out_grad
diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
@@ -1691,6 +1691,7 @@
   infer_meta :
     func : ExpandInferMeta
     local_shape: out
+    spmd_rule : ExpandInferSpmd
   kernel :
     func : expand
     data_type : x
diff --git a/test/cpp/auto_parallel/CMakeLists.txt b/test/cpp/auto_parallel/CMakeLists.txt
@@ -34,6 +34,9 @@ if(WITH_DISTRIBUTE)
     cross_entropy_softmax_spmd_rule_test SRCS
     cross_entropy_softmax_spmd_rule_test.cc DEPS spmd_rule_test_util phi)
 
+  paddle_test(expand_spmd_rule_test SRCS expand_spmd_rule_test.cc DEPS
+              spmd_rule_test_util phi)
+
   paddle_test(expand_as_spmd_rule_test SRCS expand_as_spmd_rule_test.cc DEPS
               spmd_rule_test_util phi)
 
diff --git a/test/cpp/auto_parallel/expand_spmd_rule_test.cc b/test/cpp/auto_parallel/expand_spmd_rule_test.cc
@@ -0,0 +1,80 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "glog/logging.h"
+#include "test/cpp/auto_parallel/spmd_rule_test_util.h"
+namespace paddle {
+namespace distributed {
+namespace auto_parallel {
+
+ProcessMesh CreateProcessMesh() {
+  std::vector<int64_t> mesh_shape = {2, 3};
+  std::vector<int64_t> process_ids = {0, 1, 2, 3, 4, 5};
+  std::vector<std::string> dim_names = {"x", "y"};
+  return ProcessMesh(mesh_shape, process_ids, dim_names);
+}
+
+phi::distributed::DistMetaTensor CreateDistMetaTensor(
+    const std::vector<int64_t>& shape,
+    const std::vector<int64_t>& dims_mapping,
+    const ProcessMesh& process_mesh) {
+  TensorDistAttr dist_attr;
+  dist_attr.set_process_mesh(process_mesh);
+  dist_attr.set_dims_mapping(dims_mapping);
+  return phi::distributed::DistMetaTensor(phi::make_ddim(shape), dist_attr);
+}
+
+TEST(ExpandInferSpmd, Ctor) {
+  ProcessMesh process_mesh = CreateProcessMesh();
+
+  // Test case forward 1: Expand with shape {8, 2, 6, 1024, -1}
+  auto x = CreateDistMetaTensor(
+      {8, 2, 1, 1024, 128}, {0, -1, -1, 1, -1}, process_mesh);
+  phi::IntArray shape = {8, 2, 6, 1024, -1};
+  auto spmdinfo = ExpandInferSpmd(x, shape);
+  EXPECT_EQ(get_dims_mapping(spmdinfo.first[0]),
+            std::vector<int64_t>({0, -1, -1, 1, -1}));
+  EXPECT_EQ(get_dims_mapping(spmdinfo.second[0]),
+            std::vector<int64_t>({0, -1, -1, 1, -1}));
+
+  // Test case forward 2: Expand with shape {2, -1}
+  auto x1 = CreateDistMetaTensor({8}, {1}, process_mesh);
+  phi::IntArray shape1 = {2, -1};
+  auto spmdinfo1 = ExpandInferSpmd(x1, shape1);
+  EXPECT_EQ(get_dims_mapping(spmdinfo1.first[0]), std::vector<int64_t>({1}));
+  EXPECT_EQ(get_dims_mapping(spmdinfo1.second[0]),
+            std::vector<int64_t>({-1, 1}));
+
+  // Test case forward 3: Expand with shape {0, -1}
+  auto x2 = CreateDistMetaTensor({8}, {1}, process_mesh);
+  phi::IntArray shape2 = {0, -1};
+  auto spmdinfo2 = ExpandInferSpmd(x2, shape2);
+  EXPECT_EQ(get_dims_mapping(spmdinfo2.first[0]), std::vector<int64_t>({1}));
+  EXPECT_EQ(get_dims_mapping(spmdinfo2.second[0]),
+            std::vector<int64_t>({-1, 1}));
+
+  // Test case backward 1: ExpandGrad with shape {0, -1}
+  auto x3 = CreateDistMetaTensor({8}, {1}, process_mesh);
+  auto out3 = CreateDistMetaTensor({2, 8}, {-1, 1}, process_mesh);
+  phi::IntArray shape3 = {0, -1};
+  auto spmdinfo3 = ExpandGradInferSpmd(x3, out3, shape3);
+  EXPECT_EQ(get_dims_mapping(spmdinfo3.first[0]), std::vector<int64_t>({1}));
+  EXPECT_EQ(get_dims_mapping(spmdinfo3.first[1]),
+            std::vector<int64_t>({-1, 1}));
+  EXPECT_EQ(get_dims_mapping(spmdinfo3.second[0]), std::vector<int64_t>({1}));
+}
+
+}  // namespace auto_parallel
+}  // namespace distributed
+}  // namespace paddle