Skip to content

Commit 331d556

Browse files
authored
[XPU] fix index's datatype, using int64 instead of int, part 2 (g-z) (#72519)
1 parent 16800d1 commit 331d556

File tree

104 files changed

+761
-788
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

104 files changed

+761
-788
lines changed

cmake/external/xpu.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ set(XPU_XBLAS_LIB_NAME "libxpu_blas.so")
2929
set(XPU_XFA_LIB_NAME "libxpu_flash_attention.so")
3030
set(XPU_XPUDNN_LIB_NAME "libxpu_dnn.so")
3131
set(XPU_FFT_LIB_NAME "libcufft.so")
32+
# Avoid deprecated int32 apis:
33+
add_compile_definitions(XPUAPI_NOT_INCLUDE_DEPRECATED)
3234

3335
if(NOT DEFINED XPU_XHPC_BASE_DATE)
3436
set(XPU_XHPC_BASE_DATE "dev/20250417")

paddle/phi/backends/xpu/xpu3_op_list.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,7 @@ XPUOpMap& get_kl3_ops() {
10751075
phi::DataType::BFLOAT16})},
10761076
{"pow2_decay_with_linear_warmup", XPUKernelSet({phi::DataType::FLOAT32})},
10771077
{"prior_box", XPUKernelSet({phi::DataType::FLOAT32})},
1078+
{"prelu", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
10781079
{"prelu_grad",
10791080
XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
10801081
{"prod_raw", XPUKernelSet({phi::DataType::FLOAT32})},

paddle/phi/kernels/funcs/selected_rows_functor.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ struct SelectedRowsAddToTensor<phi::XPUContext, T> {
358358
auto& in1_rows = input1.rows();
359359
int64_t* in1_rows_data = nullptr;
360360
xpu::VectorParam<int64_t> in1_rows_vec{
361-
in1_rows.data(), static_cast<int>(in1_rows.size()), in1_rows_data};
361+
in1_rows.data(), static_cast<int64_t>(in1_rows.size()), in1_rows_data};
362362

363363
int64_t in1_row_numel = in1_value.numel() / in1_rows.size();
364364
PADDLE_ENFORCE_EQ(
@@ -373,9 +373,9 @@ struct SelectedRowsAddToTensor<phi::XPUContext, T> {
373373
auto* in1_data = in1_value.data<T>();
374374
auto* out_data = input2->data<T>();
375375

376-
int h = in1_rows.size();
377-
int w = in1_row_numel;
378-
const std::vector<int> xshape{h, w};
376+
int64_t h = in1_rows.size();
377+
int64_t w = in1_row_numel;
378+
const std::vector<int64_t> xshape{h, w};
379379

380380
int r = xpu::scatter<XPUType, int64_t>(
381381
context.x_context(),

paddle/phi/kernels/funcs/unfold_functor.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,11 @@ namespace phi {
1818
namespace funcs {
1919

2020
//////// CalcOutputSize Functor ///////
21-
inline int CalcOutputSize(int input_size,
22-
int filter_size,
23-
int dilation,
24-
int padding1,
25-
int padding2,
26-
int stride) {
27-
const int dkernel = dilation * (filter_size - 1) + 1;
28-
int output_size = (input_size + padding1 + padding2 - dkernel) / stride + 1;
21+
template <typename T = int>
22+
inline T CalcOutputSize(
23+
T input_size, T filter_size, T dilation, T padding1, T padding2, T stride) {
24+
const T dkernel = dilation * (filter_size - 1) + 1;
25+
T output_size = (input_size + padding1 + padding2 - dkernel) / stride + 1;
2926
return input_size == -1 ? -1 : output_size;
3027
}
3128

paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ void Conv2dXPUKernelImpl(const Context& ctx,
3737
const paddle::optional<DenseTensor>& branch_max,
3838
const paddle::optional<DenseTensor>& scale_max,
3939
const paddle::optional<DenseTensor>& out_max_in,
40-
const std::vector<int>& paddings,
41-
const std::vector<int>& dilations,
42-
const std::vector<int>& strides,
40+
const std::vector<int>& paddings_,
41+
const std::vector<int>& dilations_,
42+
const std::vector<int>& strides_,
4343
const std::string& padding_algorithm,
4444
int groups,
4545
int act_type,
@@ -52,26 +52,23 @@ void Conv2dXPUKernelImpl(const Context& ctx,
5252
auto input_dims = x.dims();
5353
auto filter_dims = filter.dims();
5454
// update paddings and dilations according to padding_algorithm
55-
std::vector<int> paddings_vec = paddings;
56-
std::vector<int> dilations_vec = dilations;
55+
std::vector<int64_t> paddings(paddings_.begin(), paddings_.end());
56+
std::vector<int64_t> dilations(dilations_.begin(), dilations_.end());
57+
std::vector<int64_t> strides(strides_.begin(), strides_.end());
5758
DDim in_data_dims = common::slice_ddim(input_dims, 2, input_dims.size());
5859
DDim filter_data_dims =
5960
common::slice_ddim(filter_dims, 2, filter_dims.size());
60-
std::vector<int> ksize = common::vectorize<int>(filter_data_dims);
61-
phi::UpdatePaddingAndDilation(&paddings_vec,
62-
&dilations_vec,
63-
padding_algorithm,
64-
in_data_dims,
65-
strides,
66-
ksize);
61+
std::vector<int64_t> ksize = common::vectorize<int64_t>(filter_data_dims);
62+
phi::UpdatePaddingAndDilation(
63+
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
6764

68-
int batch = static_cast<int>(input_dims[0]);
69-
int in_c = static_cast<int>(input_dims[1]);
70-
int in_h = static_cast<int>(input_dims[2]);
71-
int in_w = static_cast<int>(input_dims[3]);
72-
int out_c = static_cast<int>(filter_dims[0]);
73-
int win_h = static_cast<int>(filter_dims[2]);
74-
int win_w = static_cast<int>(filter_dims[3]);
65+
int64_t batch = input_dims[0];
66+
int64_t in_c = input_dims[1];
67+
int64_t in_h = input_dims[2];
68+
int64_t in_w = input_dims[3];
69+
int64_t out_c = filter_dims[0];
70+
int64_t win_h = filter_dims[2];
71+
int64_t win_w = filter_dims[3];
7572
auto* input_data = reinterpret_cast<const XPUTypeX*>(x.data<T_X>());
7673
const float* input_max_data =
7774
x_max.get_ptr() == nullptr ? nullptr : x_max.get_ptr()->data<float>();
@@ -130,10 +127,11 @@ void Conv2dXPUKernelImpl(const Context& ctx,
130127
/* int64_t h */ in_h,
131128
/* int64_t w */ in_w,
132129
/* int64_t oc */ out_c,
133-
/* const std::vector<int>& ksize */ std::vector<int>{win_h, win_w},
134-
/* const std::vector<int>& strides */ strides,
135-
/* const std::vector<int>& paddings */ paddings_vec,
136-
/* const std::vector<int>& dilations */ dilations_vec,
130+
/* const std::vector<int64_t>& ksize */
131+
std::vector<int64_t>{win_h, win_w},
132+
/* const std::vector<int64_t>& strides */ strides,
133+
/* const std::vector<int64_t>& paddings */ paddings,
134+
/* const std::vector<int64_t>& dilations */ dilations,
137135
/* int64_t groups */ groups,
138136
/* const float* in_maxptr */ input_max_data,
139137
/* const float* filter_maxptr */ filter_max_data,

paddle/phi/kernels/fusion/xpu/conv_transpose_xpu_kernel.cc

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ void Conv2dTransposeXPUKernel(const Context& ctx,
2626
const DenseTensor& filter,
2727
const DenseTensor& filter_max,
2828
const paddle::optional<DenseTensor>& bias,
29-
const std::vector<int>& strides,
30-
const std::vector<int>& paddings,
29+
const std::vector<int>& strides_,
30+
const std::vector<int>& paddings_,
3131
const std::vector<int>& output_padding,
3232
const IntArray& output_size,
3333
const std::string& padding_algorithm,
3434
int groups,
35-
const std::vector<int>& dilations,
35+
const std::vector<int>& dilations_,
3636
const std::string& data_format,
3737
bool has_bias,
3838
bool with_act,
@@ -48,17 +48,18 @@ void Conv2dTransposeXPUKernel(const Context& ctx,
4848

4949
DDim in_data_dims = slice_ddim(x.dims(), 2, x.dims().size()); // hw
5050
DDim filter_data_dims = slice_ddim(filter.dims(), 2, filter.dims().size());
51-
std::vector<int> ksize = common::vectorize<int>(filter_data_dims);
52-
std::vector<int> paddings_ = paddings;
53-
std::vector<int> dilations_ = dilations;
51+
std::vector<int64_t> ksize = common::vectorize<int64_t>(filter_data_dims);
52+
std::vector<int64_t> strides(strides_.begin(), strides_.end());
53+
std::vector<int64_t> paddings(paddings_.begin(), paddings_.end());
54+
std::vector<int64_t> dilations(dilations_.begin(), dilations_.end());
5455
UpdatePaddingAndDilation(
55-
&paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize);
56+
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
5657

57-
const int batch_size = static_cast<int>(x.dims()[0]);
58-
const int img_yc = static_cast<int>(x.dims()[1]);
59-
const int img_xc = static_cast<int>(out->dims()[1]);
60-
const int img_xh = static_cast<int>(out->dims()[2]);
61-
const int img_xw = static_cast<int>(out->dims()[3]);
58+
const int64_t batch_size = x.dims()[0];
59+
const int64_t img_yc = x.dims()[1];
60+
const int64_t img_xc = out->dims()[1];
61+
const int64_t img_xh = out->dims()[2];
62+
const int64_t img_xw = out->dims()[3];
6263
auto act = xpu::Activation_t::LINEAR;
6364
if (with_act) {
6465
if (act_type == "relu") {
@@ -83,8 +84,8 @@ void Conv2dTransposeXPUKernel(const Context& ctx,
8384
img_xc,
8485
ksize,
8586
strides,
86-
paddings_,
87-
dilations_,
87+
paddings,
88+
dilations,
8889
groups,
8990
x_max_data,
9091
filter_max_data,

paddle/phi/kernels/fusion/xpu/fast_where_xpu_kernel.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ void FastWhereXPUKernel(const Context& ctx,
3030
auto* x_data = reinterpret_cast<const XPUType*>(x.data<T>());
3131
auto* y_data = reinterpret_cast<const XPUType*>(y.data<T>());
3232
auto* out_data = reinterpret_cast<XPUType*>(ctx.template Alloc<T>(out));
33-
auto condition_dims = common::vectorize<int>(condition.dims());
34-
auto x_dims = common::vectorize<int>(x.dims());
35-
auto y_dims = common::vectorize<int>(y.dims());
33+
auto condition_dims = common::vectorize<int64_t>(condition.dims());
34+
auto x_dims = common::vectorize<int64_t>(x.dims());
35+
auto y_dims = common::vectorize<int64_t>(y.dims());
3636
PADDLE_ENFORCE_EQ(
3737
x_dims,
3838
y_dims,

paddle/phi/kernels/fusion/xpu/pad2d_xpu_kernel.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ namespace fusion {
1919
template <typename T, typename Context>
2020
void Pad2dXPUKernel(const Context& dev_ctx,
2121
const DenseTensor& x,
22-
const std::vector<int>& paddings,
22+
const std::vector<int>& paddings_,
2323
const std::string& mode,
2424
float pad_value,
2525
const std::string& data_format,
2626
DenseTensor* out) {
2727
using XPUType = typename XPUTypeTrait<T>::Type;
28-
std::vector<int> pads = paddings;
28+
std::vector<int64_t> pads(paddings_.begin(), paddings_.end());
2929

3030
auto in_dims = x.dims();
3131
const T* in_data = x.data<T>();
@@ -48,10 +48,10 @@ void Pad2dXPUKernel(const Context& dev_ctx,
4848
}
4949

5050
T* out_data = dev_ctx.template Alloc<T>(out);
51-
const int num = in_dims[0]; // n
52-
int channels = in_dims[1]; // c
53-
int in_height = in_dims[2]; // xh
54-
int in_width = in_dims[3]; // xw
51+
const int64_t num = in_dims[0]; // n
52+
int64_t channels = in_dims[1]; // c
53+
int64_t in_height = in_dims[2]; // xh
54+
int64_t in_width = in_dims[3]; // xw
5555
if (data_format == "NHWC") {
5656
in_height = in_dims[1]; // xh
5757
in_width = in_dims[2]; // xw
@@ -111,7 +111,7 @@ void Pad2dXPUKernel(const Context& dev_ctx,
111111
}
112112

113113
// set pad3d's pads to pad2d's pads_xpu
114-
std::vector<int> pads_xpu(4);
114+
std::vector<int64_t> pads_xpu(4);
115115
pads_xpu[0] = pads[2]; // pt
116116
pads_xpu[1] = pads[3]; // pd
117117
pads_xpu[2] = pads[0]; // pl

0 commit comments

Comments
 (0)