Skip to content

Commit 1980035

Browse files
committed
fix unstack big tensor
1 parent b8455e0 commit 1980035

File tree

4 files changed

+16
-13
lines changed

4 files changed

+16
-13
lines changed

paddle/phi/infermeta/backward.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,8 +1747,8 @@ void UnStackGradInferMeta(const std::vector<const MetaTensor*>& out_grad,
17471747
rank));
17481748
if (axis < 0) axis += (rank + 1);
17491749

1750-
auto vec = common::vectorize<int>(input_dims[0]);
1751-
vec.insert(vec.begin() + axis, static_cast<int>(input_dims.size()));
1750+
auto vec = common::vectorize<int64_t>(input_dims[0]);
1751+
vec.insert(vec.begin() + axis, static_cast<int64_t>(input_dims.size()));
17521752
x_grad->set_dims(common::make_ddim(vec));
17531753
x_grad->set_dtype(out_grad[0]->dtype());
17541754
}

paddle/phi/infermeta/unary.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5962,7 +5962,7 @@ void UnStackInferMeta(const MetaTensor& x,
59625962
x_dim[axis],
59635963
num));
59645964
}
5965-
auto vec = common::vectorize<int>(x_dim);
5965+
auto vec = common::vectorize<int64_t>(x_dim);
59665966
vec.erase(vec.begin() + axis);
59675967
for (size_t i = 0; i < output_count; i++) {
59685968
outs[i]->set_dims(common::make_ddim(vec));

paddle/phi/kernels/funcs/stack_and_unstack.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ void LaunchUnStackKernel(const Context& ctx,
210210
constexpr int kWarpSize = 32;
211211
constexpr int kMaxOut = 16;
212212

213-
int tid_x = 0, tid_y = 0, bid_x = 0, bid_y = 1;
213+
int64_t tid_x = 0, tid_y = 0, bid_x = 0, bid_y = 1;
214214
if (split_dim < kMaxOut) {
215215
tid_y = split_dim;
216216
tid_x =
@@ -219,10 +219,13 @@ void LaunchUnStackKernel(const Context& ctx,
219219
} else {
220220
tid_y = kMaxOut;
221221
tid_x = kWarpSize;
222-
bid_y = backends::gpu::DivUp<int>(split_dim, kMaxOut);
222+
bid_y = backends::gpu::DivUp<int64_t>(split_dim, kMaxOut);
223223
}
224-
int tile_x_num = backends::gpu::DivUp<int>(out_row, tid_x);
225-
bid_x = std::min(tile_x_num, backends::gpu::kMultiDimslimit);
224+
int64_t tile_x_num = backends::gpu::DivUp<int64_t>(out_row, tid_x);
225+
if (tile_x_num < static_cast<int64_t>(backends::gpu::kMultiDimslimit))
226+
bid_x = tile_x_num;
227+
else
228+
bid_x = backends::gpu::kMultiDimslimit;
226229
dim3 blocks(tid_x, tid_y, 1);
227230
dim3 grids(bid_x, bid_y, 1);
228231

paddle/phi/kernels/impl/unstack_grad_kernel_impl.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ void UnStackGradKernel(const Context &dev_ctx,
2828
DenseTensor *x_grad) {
2929
if (axis < 0) axis += (x[0]->dims().size() + 1);
3030

31-
int n = static_cast<int>(x.size());
31+
int64_t n = static_cast<int64_t>(x.size());
3232
auto *x_grad_data = dev_ctx.template Alloc<T>(x_grad);
3333
std::vector<const T *> x_datas(n);
34-
for (int i = 0; i < n; i++) x_datas[i] = x[i]->data<T>();
34+
for (int64_t i = 0; i < n; i++) x_datas[i] = x[i]->data<T>();
3535

36-
int pre = 1;
37-
int post = 1;
36+
int64_t pre = 1;
37+
int64_t post = 1;
3838
auto &dim = x[0]->dims();
3939
for (auto i = 0; i < axis; ++i) pre *= dim[i];
4040
for (auto i = axis; i < dim.size(); ++i) post *= dim[i];
@@ -56,8 +56,8 @@ void UnStackGradKernel(const Context &dev_ctx,
5656

5757
size_t x_offset = 0;
5858
size_t y_offset = 0;
59-
for (int i = 0; i < pre; i++) {
60-
for (int j = 0; j < n; j++) {
59+
for (int64_t i = 0; i < pre; i++) {
60+
for (int64_t j = 0; j < n; j++) {
6161
std::memcpy(
6262
x_grad_data + y_offset, x_data_arr[j] + x_offset, post * sizeof(T));
6363
y_offset += post;

0 commit comments

Comments
 (0)