Skip to content

Commit 48c699b

Browse files
committed
[XPU] fix index's datatype, using int64 instead of int, part 2 (g-z)
1 parent 7c5f282 commit 48c699b

File tree

103 files changed

+765
-780
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

103 files changed

+765
-780
lines changed

cmake/external/xpu.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ set(XPU_XBLAS_LIB_NAME "libxpu_blas.so")
2929
set(XPU_XFA_LIB_NAME "libxpu_flash_attention.so")
3030
set(XPU_XPUDNN_LIB_NAME "libxpu_dnn.so")
3131
set(XPU_FFT_LIB_NAME "libcufft.so")
32+
# Avoid deprecated int32 apis:
33+
add_compile_definitions(XPUAPI_NOT_INCLUDE_DEPRECATED)
3234

3335
if(NOT DEFINED XPU_XHPC_BASE_DATE)
3436
set(XPU_XHPC_BASE_DATE "dev/20250417")

paddle/phi/kernels/funcs/selected_rows_functor.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ struct SelectedRowsAddToTensor<phi::XPUContext, T> {
358358
auto& in1_rows = input1.rows();
359359
int64_t* in1_rows_data = nullptr;
360360
xpu::VectorParam<int64_t> in1_rows_vec{
361-
in1_rows.data(), static_cast<int>(in1_rows.size()), in1_rows_data};
361+
in1_rows.data(), static_cast<int64_t>(in1_rows.size()), in1_rows_data};
362362

363363
int64_t in1_row_numel = in1_value.numel() / in1_rows.size();
364364
PADDLE_ENFORCE_EQ(
@@ -373,9 +373,9 @@ struct SelectedRowsAddToTensor<phi::XPUContext, T> {
373373
auto* in1_data = in1_value.data<T>();
374374
auto* out_data = input2->data<T>();
375375

376-
int h = in1_rows.size();
377-
int w = in1_row_numel;
378-
const std::vector<int> xshape{h, w};
376+
int64_t h = in1_rows.size();
377+
int64_t w = in1_row_numel;
378+
const std::vector<int64_t> xshape{h, w};
379379

380380
int r = xpu::scatter<XPUType, int64_t>(
381381
context.x_context(),

paddle/phi/kernels/funcs/unfold_functor.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,11 @@ namespace phi {
1818
namespace funcs {
1919

2020
//////// CalcOutputSize Functor ///////
21-
inline int CalcOutputSize(int input_size,
22-
int filter_size,
23-
int dilation,
24-
int padding1,
25-
int padding2,
26-
int stride) {
27-
const int dkernel = dilation * (filter_size - 1) + 1;
28-
int output_size = (input_size + padding1 + padding2 - dkernel) / stride + 1;
21+
template <typename T = int>
22+
inline T CalcOutputSize(
23+
T input_size, T filter_size, T dilation, T padding1, T padding2, T stride) {
24+
const T dkernel = dilation * (filter_size - 1) + 1;
25+
T output_size = (input_size + padding1 + padding2 - dkernel) / stride + 1;
2926
return input_size == -1 ? -1 : output_size;
3027
}
3128

paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ void Conv2dXPUKernelImpl(const Context& ctx,
3737
const paddle::optional<DenseTensor>& branch_max,
3838
const paddle::optional<DenseTensor>& scale_max,
3939
const paddle::optional<DenseTensor>& out_max_in,
40-
const std::vector<int>& paddings,
41-
const std::vector<int>& dilations,
42-
const std::vector<int>& strides,
40+
const std::vector<int>& paddings_,
41+
const std::vector<int>& dilations_,
42+
const std::vector<int>& strides_,
4343
const std::string& padding_algorithm,
4444
int groups,
4545
int act_type,
@@ -52,26 +52,23 @@ void Conv2dXPUKernelImpl(const Context& ctx,
5252
auto input_dims = x.dims();
5353
auto filter_dims = filter.dims();
5454
// update paddings and dilations according to padding_algorithm
55-
std::vector<int> paddings_vec = paddings;
56-
std::vector<int> dilations_vec = dilations;
55+
std::vector<int64_t> paddings(paddings_.begin(), paddings_.end());
56+
std::vector<int64_t> dilations(dilations_.begin(), dilations_.end());
57+
std::vector<int64_t> strides(strides_.begin(), strides_.end());
5758
DDim in_data_dims = common::slice_ddim(input_dims, 2, input_dims.size());
5859
DDim filter_data_dims =
5960
common::slice_ddim(filter_dims, 2, filter_dims.size());
60-
std::vector<int> ksize = common::vectorize<int>(filter_data_dims);
61-
phi::UpdatePaddingAndDilation(&paddings_vec,
62-
&dilations_vec,
63-
padding_algorithm,
64-
in_data_dims,
65-
strides,
66-
ksize);
61+
std::vector<int64_t> ksize = common::vectorize<int64_t>(filter_data_dims);
62+
phi::UpdatePaddingAndDilation(
63+
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
6764

68-
int batch = static_cast<int>(input_dims[0]);
69-
int in_c = static_cast<int>(input_dims[1]);
70-
int in_h = static_cast<int>(input_dims[2]);
71-
int in_w = static_cast<int>(input_dims[3]);
72-
int out_c = static_cast<int>(filter_dims[0]);
73-
int win_h = static_cast<int>(filter_dims[2]);
74-
int win_w = static_cast<int>(filter_dims[3]);
65+
int64_t batch = input_dims[0];
66+
int64_t in_c = input_dims[1];
67+
int64_t in_h = input_dims[2];
68+
int64_t in_w = input_dims[3];
69+
int64_t out_c = filter_dims[0];
70+
int64_t win_h = filter_dims[2];
71+
int64_t win_w = filter_dims[3];
7572
auto* input_data = reinterpret_cast<const XPUTypeX*>(x.data<T_X>());
7673
const float* input_max_data =
7774
x_max.get_ptr() == nullptr ? nullptr : x_max.get_ptr()->data<float>();
@@ -130,10 +127,11 @@ void Conv2dXPUKernelImpl(const Context& ctx,
130127
/* int64_t h */ in_h,
131128
/* int64_t w */ in_w,
132129
/* int64_t oc */ out_c,
133-
/* const std::vector<int>& ksize */ std::vector<int>{win_h, win_w},
134-
/* const std::vector<int>& strides */ strides,
135-
/* const std::vector<int>& paddings */ paddings_vec,
136-
/* const std::vector<int>& dilations */ dilations_vec,
130+
/* const std::vector<int64_t>& ksize */
131+
std::vector<int64_t>{win_h, win_w},
132+
/* const std::vector<int64_t>& strides */ strides,
133+
/* const std::vector<int64_t>& paddings */ paddings,
134+
/* const std::vector<int64_t>& dilations */ dilations,
137135
/* int64_t groups */ groups,
138136
/* const float* in_maxptr */ input_max_data,
139137
/* const float* filter_maxptr */ filter_max_data,

paddle/phi/kernels/fusion/xpu/conv_transpose_xpu_kernel.cc

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ void Conv2dTransposeXPUKernel(const Context& ctx,
2626
const DenseTensor& filter,
2727
const DenseTensor& filter_max,
2828
const paddle::optional<DenseTensor>& bias,
29-
const std::vector<int>& strides,
30-
const std::vector<int>& paddings,
29+
const std::vector<int>& strides_,
30+
const std::vector<int>& paddings_,
3131
const std::vector<int>& output_padding,
3232
const IntArray& output_size,
3333
const std::string& padding_algorithm,
3434
int groups,
35-
const std::vector<int>& dilations,
35+
const std::vector<int>& dilations_,
3636
const std::string& data_format,
3737
bool has_bias,
3838
bool with_act,
@@ -48,17 +48,18 @@ void Conv2dTransposeXPUKernel(const Context& ctx,
4848

4949
DDim in_data_dims = slice_ddim(x.dims(), 2, x.dims().size()); // hw
5050
DDim filter_data_dims = slice_ddim(filter.dims(), 2, filter.dims().size());
51-
std::vector<int> ksize = common::vectorize<int>(filter_data_dims);
52-
std::vector<int> paddings_ = paddings;
53-
std::vector<int> dilations_ = dilations;
51+
std::vector<int64_t> ksize = common::vectorize<int64_t>(filter_data_dims);
52+
std::vector<int64_t> strides(strides_.begin(), strides_.end());
53+
std::vector<int64_t> paddings(paddings_.begin(), paddings_.end());
54+
std::vector<int64_t> dilations(dilations_.begin(), dilations_.end());
5455
UpdatePaddingAndDilation(
55-
&paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize);
56+
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
5657

57-
const int batch_size = static_cast<int>(x.dims()[0]);
58-
const int img_yc = static_cast<int>(x.dims()[1]);
59-
const int img_xc = static_cast<int>(out->dims()[1]);
60-
const int img_xh = static_cast<int>(out->dims()[2]);
61-
const int img_xw = static_cast<int>(out->dims()[3]);
58+
const int64_t batch_size = x.dims()[0];
59+
const int64_t img_yc = x.dims()[1];
60+
const int64_t img_xc = out->dims()[1];
61+
const int64_t img_xh = out->dims()[2];
62+
const int64_t img_xw = out->dims()[3];
6263
auto act = xpu::Activation_t::LINEAR;
6364
if (with_act) {
6465
if (act_type == "relu") {
@@ -83,8 +84,8 @@ void Conv2dTransposeXPUKernel(const Context& ctx,
8384
img_xc,
8485
ksize,
8586
strides,
86-
paddings_,
87-
dilations_,
87+
paddings,
88+
dilations,
8889
groups,
8990
x_max_data,
9091
filter_max_data,

paddle/phi/kernels/fusion/xpu/fast_where_xpu_kernel.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ void FastWhereXPUKernel(const Context& ctx,
3030
auto* x_data = reinterpret_cast<const XPUType*>(x.data<T>());
3131
auto* y_data = reinterpret_cast<const XPUType*>(y.data<T>());
3232
auto* out_data = reinterpret_cast<XPUType*>(ctx.template Alloc<T>(out));
33-
auto condition_dims = common::vectorize<int>(condition.dims());
34-
auto x_dims = common::vectorize<int>(x.dims());
35-
auto y_dims = common::vectorize<int>(y.dims());
33+
auto condition_dims = common::vectorize<int64_t>(condition.dims());
34+
auto x_dims = common::vectorize<int64_t>(x.dims());
35+
auto y_dims = common::vectorize<int64_t>(y.dims());
3636
PADDLE_ENFORCE_EQ(
3737
x_dims,
3838
y_dims,

paddle/phi/kernels/fusion/xpu/pad2d_xpu_kernel.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ namespace fusion {
1919
template <typename T, typename Context>
2020
void Pad2dXPUKernel(const Context& dev_ctx,
2121
const DenseTensor& x,
22-
const std::vector<int>& paddings,
22+
const std::vector<int>& paddings_,
2323
const std::string& mode,
2424
float pad_value,
2525
const std::string& data_format,
2626
DenseTensor* out) {
2727
using XPUType = typename XPUTypeTrait<T>::Type;
28-
std::vector<int> pads = paddings;
28+
std::vector<int64_t> pads(paddings_.begin(), paddings_.end());
2929

3030
auto in_dims = x.dims();
3131
const T* in_data = x.data<T>();
@@ -48,10 +48,10 @@ void Pad2dXPUKernel(const Context& dev_ctx,
4848
}
4949

5050
T* out_data = dev_ctx.template Alloc<T>(out);
51-
const int num = in_dims[0]; // n
52-
int channels = in_dims[1]; // c
53-
int in_height = in_dims[2]; // xh
54-
int in_width = in_dims[3]; // xw
51+
const int64_t num = in_dims[0]; // n
52+
int64_t channels = in_dims[1]; // c
53+
int64_t in_height = in_dims[2]; // xh
54+
int64_t in_width = in_dims[3]; // xw
5555
if (data_format == "NHWC") {
5656
in_height = in_dims[1]; // xh
5757
in_width = in_dims[2]; // xw
@@ -111,7 +111,7 @@ void Pad2dXPUKernel(const Context& dev_ctx,
111111
}
112112

113113
// set pad3d's pads to pad2d's pads_xpu
114-
std::vector<int> pads_xpu(4);
114+
std::vector<int64_t> pads_xpu(4);
115115
pads_xpu[0] = pads[2]; // pt
116116
pads_xpu[1] = pads[3]; // pd
117117
pads_xpu[2] = pads[0]; // pl

paddle/phi/kernels/fusion/xpu/resnet_basic_block_grad_kernel.cc

Lines changed: 62 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -51,16 +51,16 @@ class ResnetBasicBlockGradAttr {
5151
const DenseTensor &max_filter3,
5252
const DenseTensor &out,
5353
const DenseTensor &out_grad,
54-
int stride1_in,
55-
int stride2_in,
56-
int stride3_in,
57-
int padding1_in,
58-
int padding2_in,
59-
int padding3_in,
60-
int dilation1_in,
61-
int dilation2_in,
62-
int dilation3_in,
63-
int group_in,
54+
int64_t stride1_in,
55+
int64_t stride2_in,
56+
int64_t stride3_in,
57+
int64_t padding1_in,
58+
int64_t padding2_in,
59+
int64_t padding3_in,
60+
int64_t dilation1_in,
61+
int64_t dilation2_in,
62+
int64_t dilation3_in,
63+
int64_t group_in,
6464
float momentum_in,
6565
float epsilon_in,
6666
const std::string &data_format_in,
@@ -100,65 +100,65 @@ class ResnetBasicBlockGradAttr {
100100
auto conv1_out = &conv1_in;
101101
auto filter2 = &filter2_in;
102102
auto conv2_out = &conv2_in;
103-
conv1_input_shape = common::vectorize<int>(input1->dims());
104-
conv1_output_shape = common::vectorize<int>(conv1_out->dims());
105-
conv1_filter_shape = common::vectorize<int>(filter1->dims());
103+
conv1_input_shape = common::vectorize<int64_t>(input1->dims());
104+
conv1_output_shape = common::vectorize<int64_t>(conv1_out->dims());
105+
conv1_filter_shape = common::vectorize<int64_t>(filter1->dims());
106106
conv1_filter_numel = filter1->numel();
107107
conv1_input_numel = input1->numel();
108108
conv1_output_numel = conv1_out->numel();
109109

110-
conv2_input_shape = common::vectorize<int>(conv1_out->dims());
111-
conv2_output_shape = common::vectorize<int>(conv2_out->dims());
112-
conv2_filter_shape = common::vectorize<int>(filter2->dims());
110+
conv2_input_shape = common::vectorize<int64_t>(conv1_out->dims());
111+
conv2_output_shape = common::vectorize<int64_t>(conv2_out->dims());
112+
conv2_filter_shape = common::vectorize<int64_t>(filter2->dims());
113113
conv2_filter_numel = filter2->numel();
114114
conv2_input_numel = conv1_out->numel();
115115
conv2_output_numel = conv2_out->numel();
116116

117117
if (has_shortcut) {
118118
auto filter3 = filter3_in.get_ptr();
119119
auto conv3_out = conv3_in.get_ptr();
120-
conv3_input_shape = common::vectorize<int>(input1->dims());
121-
conv3_output_shape = common::vectorize<int>(conv3_out->dims());
122-
conv3_filter_shape = common::vectorize<int>(filter3->dims());
120+
conv3_input_shape = common::vectorize<int64_t>(input1->dims());
121+
conv3_output_shape = common::vectorize<int64_t>(conv3_out->dims());
122+
conv3_filter_shape = common::vectorize<int64_t>(filter3->dims());
123123
conv3_filter_numel = filter3->numel();
124124
conv3_input_numel = input1->numel();
125125
conv3_output_numel = conv3_out->numel();
126126
}
127127
}
128128

129-
int padding1;
130-
int padding2;
131-
int padding3;
132-
int stride1;
133-
int stride2;
134-
int stride3;
135-
int dilation1;
136-
int dilation2;
137-
int dilation3;
138-
int group;
129+
int64_t padding1;
130+
int64_t padding2;
131+
int64_t padding3;
132+
int64_t stride1;
133+
int64_t stride2;
134+
int64_t stride3;
135+
int64_t dilation1;
136+
int64_t dilation2;
137+
int64_t dilation3;
138+
int64_t group;
139139

140140
bool has_shortcut;
141141
bool find_max;
142142

143-
std::vector<int> conv1_input_shape;
144-
std::vector<int> conv1_output_shape;
145-
std::vector<int> conv1_filter_shape;
146-
std::vector<int> conv2_input_shape;
147-
std::vector<int> conv2_output_shape;
148-
std::vector<int> conv2_filter_shape;
149-
std::vector<int> conv3_input_shape;
150-
std::vector<int> conv3_output_shape;
151-
std::vector<int> conv3_filter_shape;
152-
153-
int conv1_filter_numel;
154-
int conv2_filter_numel;
155-
int conv3_filter_numel;
156-
int conv1_input_numel;
157-
int conv2_input_numel;
158-
int conv3_input_numel;
159-
int conv1_output_numel;
160-
int conv2_output_numel;
161-
int conv3_output_numel;
143+
std::vector<int64_t> conv1_input_shape;
144+
std::vector<int64_t> conv1_output_shape;
145+
std::vector<int64_t> conv1_filter_shape;
146+
std::vector<int64_t> conv2_input_shape;
147+
std::vector<int64_t> conv2_output_shape;
148+
std::vector<int64_t> conv2_filter_shape;
149+
std::vector<int64_t> conv3_input_shape;
150+
std::vector<int64_t> conv3_output_shape;
151+
std::vector<int64_t> conv3_filter_shape;
152+
153+
int64_t conv1_filter_numel;
154+
int64_t conv2_filter_numel;
155+
int64_t conv3_filter_numel;
156+
int64_t conv1_input_numel;
157+
int64_t conv2_input_numel;
158+
int64_t conv3_input_numel;
159+
int64_t conv1_output_numel;
160+
int64_t conv2_output_numel;
161+
int64_t conv3_output_numel;
162162
};
163163

164164
template <typename T>
@@ -170,20 +170,20 @@ static inline void xpu_conv2d_grad(xpu::Context *ctx,
170170
T *filter_grad_data,
171171
const float *input_max_data,
172172
const float *filter_max_data,
173-
const std::vector<int> &input_shape,
174-
const std::vector<int> &filter_shape,
175-
int padding,
176-
int stride,
177-
int dilation,
178-
int group) {
179-
std::vector<int> ksize{filter_shape[2], filter_shape[3]};
180-
std::vector<int> stride_vec{stride, stride};
181-
std::vector<int> dilation_vec{dilation, dilation};
182-
std::vector<int> padding_vec{padding, padding};
183-
int N = input_shape[0];
184-
int C = input_shape[1];
185-
int H = input_shape[2];
186-
int W = input_shape[3];
173+
const std::vector<int64_t> &input_shape,
174+
const std::vector<int64_t> &filter_shape,
175+
int64_t padding,
176+
int64_t stride,
177+
int64_t dilation,
178+
int64_t group) {
179+
std::vector<int64_t> ksize{filter_shape[2], filter_shape[3]};
180+
std::vector<int64_t> stride_vec{stride, stride};
181+
std::vector<int64_t> dilation_vec{dilation, dilation};
182+
std::vector<int64_t> padding_vec{padding, padding};
183+
int64_t N = input_shape[0];
184+
int64_t C = input_shape[1];
185+
int64_t H = input_shape[2];
186+
int64_t W = input_shape[3];
187187

188188
int r = xpu::conv2d_grad<T, T, T, int16_t>(ctx,
189189
input_data,

0 commit comments

Comments
 (0)