add dynamic support for max_grad op

cubehan3 · cubehan3 · commit b733fa6ff358 · 2024-08-12T08:21:13.000Z
diff --git a/paddle/fluid/primitive/rule/vjp/details.h b/paddle/fluid/primitive/rule/vjp/details.h
@@ -1560,43 +1560,91 @@ void max_grad(const Tensor& x,
   if (!x_grad) {
     return;
   }
-  auto zero_tensor = full<T>(common::vectorize(x.dims()), 0.0, x.dtype());
-  std::vector<int64_t> x_dim = common::vectorize<int64_t>(x.dims());
-  int64_t axis_size = axis.size();
-  int64_t x_dim_size = x_dim.size();
-  reduce_all = false;
-  if (reduce_all || axis_size == 0 || axis_size == x_dim_size) {
-    reduce_all = true;
-  } else {
+
+  Tensor x_grad_tmp;
+  if (has_dynamic_shape(x.shape())) {
+    const Tensor x_shape = shape<T>(x);
+    const Tensor zero_tensor =
+        backend::full_with_tensor<T>(x_shape, 0.0, x.dtype());
+    const size_t axis_size = axis.size();
+    const size_t x_dim_size = x.dims().size();
+
     reduce_all = false;
-  }
-  auto x_grad_tmp = Tensor();
-  if (x_dim_size == 0 || x_dim_size == 1 || keepdim) {
-    auto out_grad_tmp = out_grad.expand(IntArray(x_dim));
-    auto out_tmp = out.expand(IntArray(x_dim));
-    auto mask = equal<T>(x, out_tmp);
-    x_grad_tmp = where<T>(mask, out_grad_tmp, zero_tensor);
-  } else {
-    auto axis_ = std::vector<int64_t>();
-    if (reduce_all) {
-      for (int64_t i = 0; i < x_dim_size; i++) {
-        axis_.push_back(i);
+    if (reduce_all || axis_size == 0 || axis_size == x_dim_size) {
+      reduce_all = true;
+    } else {
+      reduce_all = false;
+    }
+
+    if (x_dim_size == 0 || x_dim_size == 1 || keepdim) {
+      auto out_grad_tmp = backend::expand<T>(out_grad, x_shape);
+      auto out_tmp = backend::expand<T>(out, x_shape);
+      auto mask = equal<T>(x, out_tmp);
+      x_grad_tmp = where<T>(mask, out_grad_tmp, zero_tensor);
+    } else {
+      const Tensor out_grad_shape = shape<T>(out_grad);
+      auto axis_ = std::vector<int64_t>();
+
+      if (reduce_all) {
+        for (int64_t i = 0; i < x_dim_size; i++) {
+          axis_.push_back(i);
+        }
+      } else {
+        axis_ = axis.GetData();
+        for (int64_t i = 0; i < axis_size; i++) {
+          if (axis[i] < 0) {
+            axis_[i] = axis[i] + x_dim_size;
+          }
+        }
       }
+      const Tensor out_grad_shape_extend =
+          get_unsqueeze_dims<T>(out_grad_shape, axis_);
+      auto out_grad_ = backend::reshape<T>(out_grad, out_grad_shape_extend);
+      auto out_ = backend::reshape<T>(out, out_grad_shape_extend);
+      auto out_grad_tmp = backend::expand<T>(out_grad_, x_shape);
+      auto out_tmp = backend::expand<T>(out_, x_shape);
+      auto mask = equal<T>(x, out_tmp);
+      x_grad_tmp = where<T>(mask, out_grad_tmp, zero_tensor);
+    }
+  } else {
+    auto zero_tensor = full<T>(common::vectorize(x.dims()), 0.0, x.dtype());
+    std::vector<int64_t> x_dim = common::vectorize<int64_t>(x.dims());
+    int64_t axis_size = axis.size();
+    int64_t x_dim_size = x_dim.size();
+    reduce_all = false;
+    if (reduce_all || axis_size == 0 || axis_size == x_dim_size) {
+      reduce_all = true;
     } else {
-      axis_ = axis.GetData();
-      for (int64_t i = 0; i < axis_size; i++) {
-        if (axis[i] < 0) {
-          axis_[i] = axis[i] + x_dim_size;
+      reduce_all = false;
+    }
+
+    if (x_dim_size == 0 || x_dim_size == 1 || keepdim) {
+      auto out_grad_tmp = out_grad.expand(IntArray(x_dim));
+      auto out_tmp = out.expand(IntArray(x_dim));
+      auto mask = equal<T>(x, out_tmp);
+      x_grad_tmp = where<T>(mask, out_grad_tmp, zero_tensor);
+    } else {
+      auto axis_ = std::vector<int64_t>();
+      if (reduce_all) {
+        for (int64_t i = 0; i < x_dim_size; i++) {
+          axis_.push_back(i);
+        }
+      } else {
+        axis_ = axis.GetData();
+        for (int64_t i = 0; i < axis_size; i++) {
+          if (axis[i] < 0) {
+            axis_[i] = axis[i] + x_dim_size;
+          }
         }
       }
+      auto out_grad_shape = get_unsqueeze_dims(out_grad, axis_);
+      auto out_grad_ = reshape<T>(out_grad, out_grad_shape);
+      auto out_ = reshape<T>(out, out_grad_shape);
+      auto out_grad_tmp = out_grad_.expand(IntArray(x_dim));
+      auto out_tmp = out_.expand(IntArray(x_dim));
+      auto mask = equal<T>(x, out_tmp);
+      x_grad_tmp = where<T>(mask, out_grad_tmp, zero_tensor);
     }
-    auto out_grad_shape = get_unsqueeze_dims(out_grad, axis_);
-    auto out_grad_ = reshape<T>(out_grad, out_grad_shape);
-    auto out_ = reshape<T>(out, out_grad_shape);
-    auto out_grad_tmp = out_grad_.expand(IntArray(x_dim));
-    auto out_tmp = out_.expand(IntArray(x_dim));
-    auto mask = equal<T>(x, out_tmp);
-    x_grad_tmp = where<T>(mask, out_grad_tmp, zero_tensor);
   }
   set_output<T>(x_grad_tmp, x_grad);
 }
@@ -2292,7 +2340,7 @@ void swiglu_grad(const Tensor& x,
                  Tensor* dx,
                  Tensor* dy) {
   const auto& x_shape = x.shape();
-  auto one_tensor = full<T>(x_shape, 1.0, x.dtype());
+  auto one_tensor = full_scalar<T>(1.0, x.dtype());
   Tensor x_grad;
   if (y) {
     const auto& y_tensor = y.get();
diff --git a/paddle/fluid/primitive/utils/utils.h b/paddle/fluid/primitive/utils/utils.h
@@ -77,7 +77,7 @@ static std::vector<int64_t> get_expand_dims(const Tensor& origin,
   return result;
 }
 
-// This fucction compute unsqueeze dims for reshape to replace unsqueeze.
+// This function compute unsqueeze dims for reshape to replace unsqueeze.
 static std::vector<int64_t> get_unsqueeze_dims(
     const Tensor& origin, const std::vector<int64_t>& axis) {
   auto origin_dims = origin.shape();
@@ -103,7 +103,41 @@ static std::vector<int64_t> get_unsqueeze_dims(
   return result;
 }
 
-// This fucction compute unsqueeze dims for reshape to replace unsqueeze.
+// This function compute `dynamic` unsqueeze dims for reshape to replace
+// unsqueeze. And should used only on `dynamic`.
+template <typename T>
+Tensor get_unsqueeze_dims(const Tensor& origin_shape,
+                          const std::vector<int64_t>& axis) {
+  auto total_shape_size = origin_shape.numel() + axis.size();
+  const Tensor one = full<T>({1}, 1, origin_shape.dtype());
+
+  std::vector<Tensor> result(total_shape_size, one);
+  // to support axis not in increasing order.
+  std::vector<bool> is_set(total_shape_size, false);
+
+  for (size_t i = 0; i < axis.size(); ++i) {
+    PADDLE_ENFORCE_LT(
+        axis[i],
+        total_shape_size,
+        common::errors::OutOfRange("Your index [%lu] exceeds the number of "
+                                   "elements in origin_dims[%lu].",
+                                   axis[i],
+                                   total_shape_size));
+    is_set[axis[i]] = true;
+  }
+
+  size_t j = 0;
+  for (size_t i = 0; i < total_shape_size; ++i) {
+    if (is_set[i]) {
+      continue;
+    }
+    result[i] = get_slice<T>(origin_shape, int64_t(j));
+    is_set[i] = true;
+    ++j;
+  }
+  return concat<T>(result);
+}
+
 static std::vector<int64_t> get_squeeze_dims(const Tensor& origin,
                                              const std::vector<int64_t>& axis) {
   auto origin_dims = origin.shape();
diff --git a/python/paddle/autograd/backward_utils.py b/python/paddle/autograd/backward_utils.py
@@ -65,6 +65,7 @@
     "pd_op.gelu",
     "pd_op.hardswish",
     "pd_op.reduce_as",
+    "pd_op.max",
 ]
 
 
diff --git a/test/prim/pir_prim/test_prim_sub_graph_backward_dynamic_shape.py b/test/prim/pir_prim/test_prim_sub_graph_backward_dynamic_shape.py
@@ -175,6 +175,30 @@ def reduce_as_net(x, y):
     return paddle.reduce_as(x, y)
 
 
+def max_net1(x):
+    return paddle.max(x, keepdim=True)
+
+
+def max_net2(x):
+    return paddle.max(x, keepdim=False)
+
+
+def max_net3(x):
+    return paddle.max(x, axis=[0, 1], keepdim=False)
+
+
+def max_net4(x):
+    return paddle.max(x, axis=[-1, -2], keepdim=False)
+
+
+def max_net5(x):
+    return paddle.max(x, axis=[-1, 0], keepdim=False)
+
+
+def max_net6(x):
+    return paddle.max(x)
+
+
 def apply_to_static(net, use_cinn, input_spec=None):
     build_strategy = paddle.static.BuildStrategy()
     build_strategy.build_cinn_pass = use_cinn
@@ -1966,5 +1990,89 @@ def setUp(self):
         self.y_without_grad = True
 
 
+class TestPrimMaxWithGrad1(TestPrimBaseWithGrad):
+    def setUp(self):
+        np.random.seed(2024)
+        self.dtype = "float32"
+        self.x_shape = [30, 200, 40]
+        self.init_x_shape = [None, None, None]
+        self.x = np.random.random(self.x_shape).astype(self.dtype)
+        self.net = max_net1
+        self.enable_cinn = False
+        self.tol = 1e-6
+
+
+class TestPrimMaxWithGrad2(TestPrimBaseWithGrad):
+    def setUp(self):
+        np.random.seed(2024)
+        self.dtype = "float32"
+        self.x_shape = [30]
+        self.init_x_shape = [None]
+        self.x = np.random.random(self.x_shape).astype(self.dtype)
+        self.net = max_net1
+        self.enable_cinn = False
+        self.tol = 1e-6
+
+
+class TestPrimMaxWithGrad3(TestPrimBaseWithGrad):
+    def setUp(self):
+        np.random.seed(2024)
+        self.dtype = "float32"
+        self.x_shape = [30, 200, 40]
+        self.init_x_shape = [None, None, None]
+        self.x = np.random.random(self.x_shape).astype(self.dtype)
+        self.net = max_net2
+        self.enable_cinn = False
+        self.tol = 1e-6
+
+
+class TestPrimMaxWithGrad4(TestPrimBaseWithGrad):
+    def setUp(self):
+        np.random.seed(2024)
+        self.dtype = "float32"
+        self.x_shape = [30, 200, 40]
+        self.init_x_shape = [None, None, None]
+        self.x = np.random.random(self.x_shape).astype(self.dtype)
+        self.net = max_net3
+        self.enable_cinn = False
+        self.tol = 1e-6
+
+
+class TestPrimMaxWithGrad5(TestPrimBaseWithGrad):
+    def setUp(self):
+        np.random.seed(2024)
+        self.dtype = "float32"
+        self.x_shape = [30, 200, 40]
+        self.init_x_shape = [None, None, None]
+        self.x = np.random.random(self.x_shape).astype(self.dtype)
+        self.net = max_net4
+        self.enable_cinn = False
+        self.tol = 1e-6
+
+
+class TestPrimMaxWithGrad6(TestPrimBaseWithGrad):
+    def setUp(self):
+        np.random.seed(2024)
+        self.dtype = "float32"
+        self.x_shape = [30, 200, 40]
+        self.init_x_shape = [None, None, None]
+        self.x = np.random.random(self.x_shape).astype(self.dtype)
+        self.net = max_net5
+        self.enable_cinn = False
+        self.tol = 1e-6
+
+
+class TestPrimMaxWithGrad7(TestPrimBaseWithGrad):
+    def setUp(self):
+        np.random.seed(2024)
+        self.dtype = "float32"
+        self.x_shape = [30, 200, 40]
+        self.init_x_shape = [None, None, None]
+        self.x = np.random.random(self.x_shape).astype(self.dtype)
+        self.net = max_net6
+        self.enable_cinn = False
+        self.tol = 1e-6
+
+
 if __name__ == "__main__":
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -65,6 +65,7 @@`
`65`	`65`	`"pd_op.gelu",`
`66`	`66`	`"pd_op.hardswish",`
`67`	`67`	`"pd_op.reduce_as",`
	`68`	`+ "pd_op.max",`
`68`	`69`	`]`
`69`	`70`
`70`	`71`