update

zhanghonggeng · zhanghonggeng · commit e12543a8dce9 · 2025-05-28T08:28:04.000Z
diff --git a/paddle/fluid/pybind/slice_utils.h b/paddle/fluid/pybind/slice_utils.h
@@ -493,6 +493,20 @@ static paddle::Tensor dealWithAdvancedIndex(
   return transed_tensor;
 }
 
+inline std::vector<int64_t> ComputeIndexStrides(const paddle::Tensor& input,
+                                                const size_t index_dims_size) {
+  const auto& input_strides = input.strides();
+  size_t element_size_bytes = phi::SizeOf(input.dtype());
+  std::vector<int64_t> strides(index_dims_size, 0);
+  const size_t min_size =
+      std::min(static_cast<size_t>(input_strides.size()), index_dims_size);
+  for (size_t i = 0; i < min_size; ++i) {
+    strides[i] = input_strides[i] * element_size_bytes;
+  }
+
+  return strides;
+}
+
 static paddle::Tensor getValueForBoolTensor(const paddle::Tensor& tensor,
                                             const paddle::Tensor& bool_index) {
   PADDLE_ENFORCE(bool_index.shape().size() <= tensor.shape().size(),
@@ -540,8 +554,10 @@ static paddle::Tensor getValueForBoolTensor(const paddle::Tensor& tensor,
     indices.emplace_back(sliced_tensor);
   }
   auto index_dims_vec = common::vectorize<int64_t>(bool_index.dims());
+  auto index_stride = ComputeIndexStrides(tensor, index_dims_vec.size());
 
-  return index_elementwise_ad_func(tensor, indices, index_dims_vec);
+  return index_elementwise_ad_func(
+      tensor, indices, index_dims_vec, index_stride);
 #else
 
   return gather_nd_ad_func(tensor, bool_2_idx);
diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc
@@ -2149,9 +2149,9 @@ void GatherNdInferMeta(const MetaTensor& x,
 void IndexElementwiseInferMeta(const MetaTensor& x,
                                const std::vector<const MetaTensor*>& index,
                                const std::vector<int64_t>& index_dims,
+                               const std::vector<int64_t>& index_stride,
                                MetaTensor* out) {
   const auto& x_dims = x.dims();
-  // auto index_dims = index.dims();
 
   PADDLE_ENFORCE_LE(
       index_dims.size(),
diff --git a/paddle/phi/infermeta/binary.h b/paddle/phi/infermeta/binary.h
@@ -404,6 +404,7 @@ void GatherNdInferMeta(const MetaTensor& x,
 void IndexElementwiseInferMeta(const MetaTensor& x,
                                const std::vector<const MetaTensor*>& index,
                                const std::vector<int64_t>& index_dims,
+                               const std::vector<int64_t>& index_stride,
                                MetaTensor* out);
 
 void GatherTreeMeta(const MetaTensor& ids,
diff --git a/paddle/phi/kernels/funcs/index_elementwise.cu.h b/paddle/phi/kernels/funcs/index_elementwise.cu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -38,7 +38,6 @@ constexpr int MAX_DIMS = 25;
 #endif
 
 static constexpr int launch_bound2 = 4;
-
 static constexpr int launch_size_nd = 128;
 
 template <int nt, int vt, typename func_t>
@@ -92,8 +91,9 @@ struct OffsetCalculator {
                    const int64_t* const* strides,
                    const int64_t* element_sizes = nullptr)
       : dims(dims) {
-    PADDLE_ENFORCE(
-        dims <= MAX_DIMS, "tensor has too many (>", MAX_DIMS, ") dims");
+    PADDLE_ENFORCE(dims <= MAX_DIMS,
+                   "The number of dimensions (%d) exceeds MAX_DIMS.",
+                   dims);
     for (int i = 0; i < dims; i++) {
       sizes_[i] = IntDivider<index_t>(sizes[i]);
       for (int arg = 0; arg < NARGS; arg++) {
@@ -131,41 +131,23 @@ struct OffsetCalculator {
   stride_t strides_[MAX_DIMS][std::max<int>(NARGS, 1)];
 };
 
-template <typename T>
-std::array<int64_t, DDim::kMaxRank> ComputeStrides(
-    const phi::DenseTensor& input, const size_t index_dims_size) {
-  const auto& input_strides = input.strides();
-  const size_t element_size_bytes = sizeof(T);
-
-  std::array<int64_t, DDim::kMaxRank> strides{};
-
-  for (int i = 0; i < index_dims_size; ++i) {
-    if (i < input_strides.size()) {
-      strides[i] = input_strides[i] * element_size_bytes;
-    } else {
-      strides[i] = 0;
-    }
-  }
-
-  return strides;
-}
-
 template <typename IndexT>
 std::array<char*, DDim::kMaxRank> GetIndexDataPtrs(
     const std::vector<const DenseTensor*> index) {
   std::array<char*, DDim::kMaxRank> index_ptrs{};
 
   PADDLE_ENFORCE_LE(index.size(),
                     DDim::kMaxRank,
-                    "The number of index tensors exceeds the maximum rank.");
+                    "The rank of the index tensor must be less than or "
+                    "equal to DDim::kMaxRank.");
 
   for (size_t i = 0; i < index.size(); ++i) {
     const IndexT* p_index = index[i]->data<IndexT>();
 
     PADDLE_ENFORCE(p_index != nullptr,
-                   "The pointer p_index is nullptr, "
-                   "please check whether the index tensor is valid and "
-                   "its data is correctly initialized.");
+                   "The pointer p_index must not be nullptr. "
+                   "Please ensure the index tensor is valid and its data "
+                   "is correctly initialized.");
 
     index_ptrs[i] = reinterpret_cast<char*>(const_cast<IndexT*>(p_index));
   }
@@ -234,10 +216,10 @@ void IndexElementwiseKernel(const phi::GPUContext& ctx,
                             const DenseTensor& input,
                             const std::vector<const DenseTensor*> index,
                             const std::vector<int64_t>& index_dims,
+                            const std::vector<int64_t>& index_stride,
                             DenseTensor* output) {
   auto num_indices = index_dims.size();
 
-  auto index_stride = ComputeStrides<T>(input, num_indices);
   auto index_ptrs = GetIndexDataPtrs<IndexT>(index);
 
   auto sizes = std::array<int64_t, DDim::kMaxRank>{};
@@ -252,7 +234,9 @@ void IndexElementwiseKernel(const phi::GPUContext& ctx,
 
   const int64_t N = output->numel();
   PADDLE_ENFORCE(N >= 0 && N <= std::numeric_limits<int32_t>::max(),
-                 "N >= 0 && N <= std::numeric_limits<int32_t>::max()");
+
+                 "Output numel be in the range [0, "
+                 "std::numeric_limits<int32_t>::max()]");
   constexpr int nt = launch_size_nd;
   constexpr int vt = launch_bound2;
   const dim3 block(nt);
diff --git a/paddle/phi/kernels/gpu/index_elementwise_kernel.cu b/paddle/phi/kernels/gpu/index_elementwise_kernel.cu
@@ -1,4 +1,4 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@ void IndexElementwiseKernel(const Context& ctx,
                             const DenseTensor& x,
                             const std::vector<const DenseTensor*>& index,
                             const std::vector<int64_t>& index_dims,
+                            const std::vector<int64_t>& index_stride,
                             DenseTensor* out) {
   const auto& index_type = index[0]->dtype();
   PADDLE_ENFORCE_EQ(
@@ -48,10 +49,11 @@ void IndexElementwiseKernel(const Context& ctx,
   ctx.template Alloc<T>(out);
 
   if (index_type == phi::DataType::INT32) {
-    phi::funcs::IndexElementwiseKernel<T, int>(ctx, x, index, index_dims, out);
+    phi::funcs::IndexElementwiseKernel<T, int>(
+        ctx, x, index, index_dims, index_stride, out);
   } else if (index_type == phi::DataType::INT64) {
     phi::funcs::IndexElementwiseKernel<T, int64_t>(
-        ctx, x, index, index_dims, out);
+        ctx, x, index, index_dims, index_stride, out);
   }
 }
 
diff --git a/paddle/phi/kernels/index_elementwise_kernel.h b/paddle/phi/kernels/index_elementwise_kernel.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@ void IndexElementwiseKernel(const Context &ctx,
                             const DenseTensor &x,
                             const std::vector<const DenseTensor *> &index,
                             const std::vector<int64_t> &index_dims,
+                            const std::vector<int64_t> &index_stride,
                             DenseTensor *out);
 
 }  // namespace phi
diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
@@ -2754,7 +2754,7 @@
   interfaces : paddle::dialect::InferSymbolicShapeInterface, paddle::dialect::LayoutTransformationInterface
 
 - op : index_elementwise
-  args : (Tensor x, Tensor[] index, int64_t[] index_dims)
+  args : (Tensor x, Tensor[] index, int64_t[] index_dims, int64_t[] index_stride)
   output : Tensor (out)
   infer_meta :
     func : IndexElementwiseInferMeta