Skip to content

Commit c17e9d7

Browse files
committed
[XPU] fix index's datatype, using int64 instead of int, part 2 (g-n)
1 parent 6f80ea0 commit c17e9d7

File tree

90 files changed

+576
-616
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+576
-616
lines changed

paddle/phi/kernels/funcs/unfold_functor.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,11 @@ namespace phi {
1818
namespace funcs {
1919

2020
//////// CalcOutputSize Functor ///////
21-
inline int CalcOutputSize(int input_size,
22-
int filter_size,
23-
int dilation,
24-
int padding1,
25-
int padding2,
26-
int stride) {
27-
const int dkernel = dilation * (filter_size - 1) + 1;
28-
int output_size = (input_size + padding1 + padding2 - dkernel) / stride + 1;
21+
template <typename T = int>
22+
inline T CalcOutputSize(
23+
T input_size, T filter_size, T dilation, T padding1, T padding2, T stride) {
24+
const T dkernel = dilation * (filter_size - 1) + 1;
25+
T output_size = (input_size + padding1 + padding2 - dkernel) / stride + 1;
2926
return input_size == -1 ? -1 : output_size;
3027
}
3128

paddle/phi/kernels/impl/unfold_grad_kernel_impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,13 @@ void UnfoldGradKernel(const Context& ctx,
3939
const auto& x_dims = x_grad->dims();
4040
const int batch_size = static_cast<int>(x_dims[0]);
4141

42-
int out_height = phi::funcs::CalcOutputSize(x_dims[2],
42+
int out_height = phi::funcs::CalcOutputSize(static_cast<int>(x_dims[2]),
4343
kernel_sizes[0],
4444
dilations[0],
4545
paddings[0],
4646
paddings[2],
4747
strides[0]);
48-
int out_width = phi::funcs::CalcOutputSize(x_dims[3],
48+
int out_width = phi::funcs::CalcOutputSize(static_cast<int>(x_dims[3]),
4949
kernel_sizes[1],
5050
dilations[1],
5151
paddings[1],

paddle/phi/kernels/impl/unfold_kernel_impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@ void UnfoldKernel(const Context& ctx,
3737
phi::funcs::Im2ColFunctor<phi::funcs::ColFormat::kCFO, Context, T> im2col;
3838
const auto& x_dims = x.dims();
3939

40-
int out_height = phi::funcs::CalcOutputSize(x_dims[2],
40+
int out_height = phi::funcs::CalcOutputSize(static_cast<int>(x_dims[2]),
4141
kernel_sizes[0],
4242
dilations[0],
4343
paddings[0],
4444
paddings[2],
4545
strides[0]);
46-
int out_width = phi::funcs::CalcOutputSize(x_dims[3],
46+
int out_width = phi::funcs::CalcOutputSize(static_cast<int>(x_dims[3]),
4747
kernel_sizes[1],
4848
dilations[1],
4949
paddings[1],

paddle/phi/kernels/xpu/beam_search_decode_kernel.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,15 @@ void BeamSearchDecodeXPUKernel(const Context& dev_ctx,
8686
*sentenceIds, sentenceIds_temp, 1, ids->at(0).place());
8787
PADDLE_ENFORCE_EQ(
8888
r,
89-
xpu::Error_t::SUCCESS,
89+
0,
9090
common::errors::External(
9191
"Execute function CopyTensorByXPU failed by [%d]", r));
9292

9393
r = phi::funcs::CopyTensorByType(
9494
*sentenceScores, sentenceScores_temp, 1, ids->at(0).place());
9595
PADDLE_ENFORCE_EQ(
9696
r,
97-
xpu::Error_t::SUCCESS,
97+
0,
9898
common::errors::External(
9999
"Execute function CopyTensorByType failed by [%d]", r));
100100
sentenceIds_temp->set_lod(sentenceIds->lod());

paddle/phi/kernels/xpu/distribute_fpn_proposals_kernel.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ void DistributeFpnProposalsKernel(
106106
rois_lod_vec[i] = static_cast<int>(fpn_rois_lod[i]);
107107
}
108108
xpu::VectorParam<int> rois_lod = {
109-
rois_lod_vec.data(), static_cast<int>(rois_lod_vec.size()), nullptr};
109+
rois_lod_vec.data(), static_cast<int64_t>(rois_lod_vec.size()), nullptr};
110110

111111
int r = xpu::distribute_fpn_proposals_helper<XPUType, int>(
112112
dev_ctx.x_context(),

paddle/phi/kernels/xpu/elementwise.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414
#pragma once
15-
#ifdef PADDLE_WITH_XPU
1615
#include <algorithm>
1716
#include <string>
1817
#include <tuple>
@@ -218,4 +217,3 @@ void XPUElementwiseGrad(const XPUContext& dev_ctx,
218217
}
219218

220219
} // namespace phi
221-
#endif

paddle/phi/kernels/xpu/flash_attn_utils.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
// limitations under the License.
1414

1515
#pragma once
16-
17-
#ifdef PADDLE_WITH_XPU
18-
1916
#include <vector>
2017
#include "paddle/phi/backends/xpu/enforce_xpu.h"
2118
#include "paddle/phi/common/memory_utils.h"
@@ -24,7 +21,6 @@
2421
namespace xfa = baidu::xpu::xfa;
2522
namespace phi {
2623

27-
#ifdef PADDLE_WITH_XPU_XRE5
2824
using XPUTypeFP16 = typename XPUTypeTrait<phi::dtype::float16>::Type;
2925
using XPUTypeBF16 = typename XPUTypeTrait<phi::dtype::bfloat16>::Type;
3026

@@ -87,8 +83,5 @@ static void GenerateRNGState(
8783
seed_offset_data[1] = static_cast<int64_t>(seed_offset_pair.second);
8884
}
8985
}
90-
91-
#endif
92-
9386
} // namespace phi
94-
#endif
87+
#

paddle/phi/kernels/xpu/flatten2_grad_kernel.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#ifdef PADDLE_WITH_XPU
16-
1715
#include "paddle/phi/backends/xpu/enforce_xpu.h"
1816
#include "paddle/phi/core/kernel_registry.h"
1917
#include "paddle/phi/kernels/impl/flatten2_kernel_impl.h"
@@ -32,5 +30,3 @@ PD_REGISTER_KERNEL(flatten2_grad,
3230
int8_t,
3331
uint8_t,
3432
bool) {}
35-
36-
#endif

paddle/phi/kernels/xpu/flatten2_kernel.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#ifdef PADDLE_WITH_XPU
16-
1715
#include "paddle/phi/backends/xpu/enforce_xpu.h"
1816
#include "paddle/phi/core/kernel_registry.h"
1917
#include "paddle/phi/kernels/impl/flatten2_kernel_impl.h"
@@ -32,5 +30,3 @@ PD_REGISTER_KERNEL(flatten2,
3230
int8_t,
3331
uint8_t,
3432
bool) {}
35-
36-
#endif

paddle/phi/kernels/xpu/gather_grad_kernel.cc

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ void GatherGradKernel(const Context& dev_ctx,
2626
const DenseTensor& out_grad,
2727
const Scalar& axis,
2828
DenseTensor* x_grad) {
29-
auto axis_v = axis.to<int>();
29+
auto axis_v = axis.to<int64_t>();
3030
if (axis_v < 0) {
31-
axis_v += static_cast<int>(x.dims().size());
31+
axis_v += static_cast<int64_t>(x.dims().size());
3232
}
3333

3434
const auto& index_type = index.dtype();
@@ -53,7 +53,7 @@ void GatherGradKernel(const Context& dev_ctx,
5353
"The index should be 0D or 1D, when it is not 2D, but we get %d",
5454
index_dims.size()));
5555
}
56-
std::vector<int> xshape(x_grad->dims().size());
56+
std::vector<int64_t> xshape(x_grad->dims().size());
5757
for (int i = 0; i < x_grad->dims().size(); ++i) {
5858
xshape[i] = x_grad->dims()[i];
5959
}
@@ -72,24 +72,19 @@ void GatherGradKernel(const Context& dev_ctx,
7272
index.dims().size() == 0 ? 1 : index.dims()[0],
7373
axis_v,
7474
false);
75-
} else {
76-
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
77-
int* index_int_ptr_l3 = RAII_GUARD.alloc_l3_or_gm<int32_t>(index.numel());
78-
r = xpu::cast<int64_t, int32_t>(dev_ctx.x_context(),
79-
index.data<int64_t>(),
80-
index_int_ptr_l3,
81-
index.numel());
82-
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
83-
84-
r = xpu::gather_grad<XPUType, int>(
75+
} else if (index_type == DataType::INT64) {
76+
r = xpu::gather_grad<XPUType, int64_t>(
8577
dev_ctx.x_context(),
8678
reinterpret_cast<const XPUType*>(out_grad.data<T>()),
87-
index_int_ptr_l3,
79+
index.data<int64_t>(),
8880
reinterpret_cast<XPUType*>(x_grad->data<T>()),
8981
xshape,
9082
index.dims().size() == 0 ? 1 : index.dims()[0],
9183
axis_v,
9284
false);
85+
} else {
86+
PADDLE_THROW(common::errors::InvalidArgument("Unsupported index type: %s",
87+
DataTypeToString(index_type)));
9388
}
9489
PADDLE_ENFORCE_XDNN_SUCCESS(r, "gather_grad");
9590
}

paddle/phi/kernels/xpu/gather_kernel.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ void GatherKernel(const Context& dev_ctx,
6767
xshape,
6868
index.dims().size() == 0 ? 1 : index.dims()[0],
6969
axis_v);
70-
} else {
70+
} else if (index_type == DataType::INT64) {
7171
r = xpu::paddle_gather<XPUType, int64_t>(
7272
dev_ctx.x_context(),
7373
reinterpret_cast<const XPUType*>(x.data<T>()),
@@ -76,6 +76,9 @@ void GatherKernel(const Context& dev_ctx,
7676
xshape,
7777
index.dims().size() == 0 ? 1 : index.dims()[0],
7878
axis_v);
79+
} else {
80+
PADDLE_THROW(common::errors::InvalidArgument("Unsupported index type: %s",
81+
DataTypeToString(index_type)));
7982
}
8083
PADDLE_ENFORCE_XDNN_SUCCESS(r, "paddle_gather");
8184
}

paddle/phi/kernels/xpu/gather_nd_grad_kernel.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ void GatherNdGradKernel(const Context &ctx,
9393
index_shape.insert(index_shape.begin(), 1);
9494
}
9595
xpu::VectorParam<int64_t> x_vec = {
96-
x_shape.data(), static_cast<int>(x_shape.size()), nullptr};
96+
x_shape.data(), static_cast<int64_t>(x_shape.size()), nullptr};
9797

98-
int index_size = static_cast<int>(index.numel());
98+
int64_t index_size = index.numel();
9999
if (index_type == phi::DataType::INT32) {
100100
auto index_data = const_cast<int *>(index.data<int>());
101101
xpu::VectorParam<int> index_vec{nullptr, index_size, index_data};

paddle/phi/kernels/xpu/gather_nd_kernel.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,13 @@ void GatherNdKernel(const Context &ctx,
7777
DataType::INT32,
7878
DataType::INT64));
7979

80-
auto x_shape = common::vectorize<int>(x.dims());
81-
auto index_shape = common::vectorize<int>(index.dims());
80+
auto x_shape = common::vectorize<int64_t>(x.dims());
81+
auto index_shape = common::vectorize<int64_t>(index.dims());
8282
if (index_shape.size() == 1) {
8383
index_shape.insert(index_shape.begin(), 1);
8484
}
85-
xpu::VectorParam<int> x_vec = {
86-
x_shape.data(), static_cast<int>(x_shape.size()), nullptr};
85+
xpu::VectorParam<int64_t> x_vec = {
86+
x_shape.data(), static_cast<int64_t>(x_shape.size()), nullptr};
8787

8888
int ret = 0;
8989
#ifndef PADDLE_WITH_XPU_PLUGIN

paddle/phi/kernels/xpu/generate_proposals_kernel.cc

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ static void SortDescending(const XPUContext& dev_ctx,
4646
DenseTensor index_t;
4747
index_t.Resize({value.numel()});
4848
int* index = dev_ctx.template HostAlloc<int>(&index_t);
49-
for (int i = 0; i < value.numel(); ++i) {
49+
for (int64_t i = 0; i < value.numel(); ++i) {
5050
index[i] = i;
5151
}
5252

@@ -104,26 +104,25 @@ std::pair<DenseTensor, DenseTensor> ProposalForOneImage(
104104
scores_slice.data<T>(),
105105
index_sort.data<int>(),
106106
scores_sel.data<T>(),
107-
{static_cast<int>(scores_slice.numel()), 1},
107+
{scores_slice.numel(), 1},
108108
index_sort.numel(),
109109
0);
110110
PADDLE_ENFORCE_XDNN_SUCCESS(r, "paddle_gather");
111111

112-
r = xpu::paddle_gather<T>(
113-
dev_ctx.x_context(),
114-
bbox_deltas_slice.data<T>(),
115-
index_sort.data<int>(),
116-
bbox_sel.data<T>(),
117-
{static_cast<int>(bbox_deltas_slice.numel()) / 4, 4},
118-
index_sort.numel(),
119-
0);
112+
r = xpu::paddle_gather<T>(dev_ctx.x_context(),
113+
bbox_deltas_slice.data<T>(),
114+
index_sort.data<int>(),
115+
bbox_sel.data<T>(),
116+
{bbox_deltas_slice.numel() / 4, 4},
117+
index_sort.numel(),
118+
0);
120119
PADDLE_ENFORCE_XDNN_SUCCESS(r, "paddle_gather");
121120

122121
r = xpu::paddle_gather<T>(dev_ctx.x_context(),
123122
anchors.data<T>(),
124123
index_sort.data<int>(),
125124
anchor_sel.data<T>(),
126-
{static_cast<int>(anchors.numel()) / 4, 4},
125+
{anchors.numel() / 4, 4},
127126
index_sort.numel(),
128127
0);
129128
PADDLE_ENFORCE_XDNN_SUCCESS(r, "paddle_gather");
@@ -132,7 +131,7 @@ std::pair<DenseTensor, DenseTensor> ProposalForOneImage(
132131
variances.data<T>(),
133132
index_sort.data<int>(),
134133
var_sel.data<T>(),
135-
{static_cast<int>(variances.numel()) / 4, 4},
134+
{variances.numel() / 4, 4},
136135
index_sort.numel(),
137136
0);
138137
PADDLE_ENFORCE_XDNN_SUCCESS(r, "paddle_gather");

paddle/phi/kernels/xpu/grid_sample_kernel.cc

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,14 @@ void GridSampleKernel(const Context& dev_ctx,
6464
const T* input_data = x.data<T>();
6565
const T* grid_data = grid.data<T>();
6666

67-
int n = x.dims()[0];
68-
int c = x.dims()[1];
67+
int64_t n = x.dims()[0];
68+
int64_t c = x.dims()[1];
6969

7070
if (x.dims().size() == 4) { // 2D grid sample
71-
int h = x.dims()[2];
72-
int w = x.dims()[3];
73-
int out_h = grid.dims()[1];
74-
int out_w = grid.dims()[2];
71+
int64_t h = x.dims()[2];
72+
int64_t w = x.dims()[3];
73+
int64_t out_h = grid.dims()[1];
74+
int64_t out_w = grid.dims()[2];
7575

7676
bool is_nchw_bool;
7777
if (data_format == "NCHW") {
@@ -104,12 +104,12 @@ void GridSampleKernel(const Context& dev_ctx,
104104
is_nchw_bool);
105105
PADDLE_ENFORCE_XDNN_SUCCESS(r, "grid_sampler");
106106
} else { // 3D grid sample
107-
int d = x.dims()[2];
108-
int h = x.dims()[3];
109-
int w = x.dims()[4];
110-
int out_d = grid.dims()[1];
111-
int out_h = grid.dims()[2];
112-
int out_w = grid.dims()[3];
107+
int64_t d = x.dims()[2];
108+
int64_t h = x.dims()[3];
109+
int64_t w = x.dims()[4];
110+
int64_t out_d = grid.dims()[1];
111+
int64_t out_h = grid.dims()[2];
112+
int64_t out_w = grid.dims()[3];
113113

114114
out->Resize(common::make_ddim({n, c, out_d, out_h, out_w}));
115115
T* output_data = dev_ctx.template Alloc<T>(out);

paddle/phi/kernels/xpu/group_norm_grad_kernel.cc

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,20 @@ void GroupNormGradKernel(const Context& dev_ctx,
4747
const DataLayout data_layout = common::StringToDataLayout(data_layout_str);
4848
const auto scale_ptr = scale.get_ptr();
4949
const auto bias_ptr = bias.get_ptr();
50-
const auto x_dims = common::vectorize<int>(x.dims());
51-
const int N = x_dims[0];
50+
const auto x_dims = common::vectorize<int64_t>(x.dims());
51+
const int64_t N = x_dims[0];
5252
const bool channel_first =
5353
data_layout == DataLayout::kNCHW || data_layout == DataLayout::kNCDHW;
54-
const int C = (channel_first ? x_dims[1] : x_dims[x_dims.size() - 1]);
55-
const int L =
56-
(channel_first
57-
? std::accumulate(
58-
x_dims.begin() + 2, x_dims.end(), 1, std::multiplies<int>())
59-
: std::accumulate(x_dims.begin() + 1,
60-
x_dims.end() - 1,
61-
1,
62-
std::multiplies<int>()));
54+
const int64_t C = (channel_first ? x_dims[1] : x_dims[x_dims.size() - 1]);
55+
const int64_t L =
56+
(channel_first ? std::accumulate(x_dims.begin() + 2,
57+
x_dims.end(),
58+
1,
59+
std::multiplies<int64_t>())
60+
: std::accumulate(x_dims.begin() + 1,
61+
x_dims.end() - 1,
62+
1,
63+
std::multiplies<int64_t>()));
6364

6465
dev_ctx.template Alloc<T>(d_x);
6566
phi::funcs::SetConstant<XPUContext, T> set_zero;

0 commit comments

Comments
 (0)