Skip to content

[XPU] fix index's datatype, using int64 instead of int, part 2 (g-z) #72519

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ set(XPU_XBLAS_LIB_NAME "libxpu_blas.so")
set(XPU_XFA_LIB_NAME "libxpu_flash_attention.so")
set(XPU_XPUDNN_LIB_NAME "libxpu_dnn.so")
set(XPU_FFT_LIB_NAME "libcufft.so")
# Avoid deprecated int32 apis:
add_compile_definitions(XPUAPI_NOT_INCLUDE_DEPRECATED)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

不再include int32老接口,参阅内部卡片xpu-paddlepaddle-864


if(NOT DEFINED XPU_XHPC_BASE_DATE)
set(XPU_XHPC_BASE_DATE "dev/20250417")
Expand Down
1 change: 1 addition & 0 deletions paddle/phi/backends/xpu/xpu3_op_list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,7 @@ XPUOpMap& get_kl3_ops() {
phi::DataType::BFLOAT16})},
{"pow2_decay_with_linear_warmup", XPUKernelSet({phi::DataType::FLOAT32})},
{"prior_box", XPUKernelSet({phi::DataType::FLOAT32})},
{"prelu", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

顺手绑了一个之前遗留的算子

{"prelu_grad",
XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
{"prod_raw", XPUKernelSet({phi::DataType::FLOAT32})},
Expand Down
8 changes: 4 additions & 4 deletions paddle/phi/kernels/funcs/selected_rows_functor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ struct SelectedRowsAddToTensor<phi::XPUContext, T> {
auto& in1_rows = input1.rows();
int64_t* in1_rows_data = nullptr;
xpu::VectorParam<int64_t> in1_rows_vec{
in1_rows.data(), static_cast<int>(in1_rows.size()), in1_rows_data};
in1_rows.data(), static_cast<int64_t>(in1_rows.size()), in1_rows_data};

int64_t in1_row_numel = in1_value.numel() / in1_rows.size();
PADDLE_ENFORCE_EQ(
Expand All @@ -373,9 +373,9 @@ struct SelectedRowsAddToTensor<phi::XPUContext, T> {
auto* in1_data = in1_value.data<T>();
auto* out_data = input2->data<T>();

int h = in1_rows.size();
int w = in1_row_numel;
const std::vector<int> xshape{h, w};
int64_t h = in1_rows.size();
int64_t w = in1_row_numel;
const std::vector<int64_t> xshape{h, w};

int r = xpu::scatter<XPUType, int64_t>(
context.x_context(),
Expand Down
13 changes: 5 additions & 8 deletions paddle/phi/kernels/funcs/unfold_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,11 @@ namespace phi {
namespace funcs {

//////// CalcOutputSize Functor ///////
inline int CalcOutputSize(int input_size,
int filter_size,
int dilation,
int padding1,
int padding2,
int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + padding1 + padding2 - dkernel) / stride + 1;
template <typename T = int>
inline T CalcOutputSize(
T input_size, T filter_size, T dilation, T padding1, T padding2, T stride) {
const T dkernel = dilation * (filter_size - 1) + 1;
T output_size = (input_size + padding1 + padding2 - dkernel) / stride + 1;
return input_size == -1 ? -1 : output_size;
}

Expand Down
44 changes: 21 additions & 23 deletions paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ void Conv2dXPUKernelImpl(const Context& ctx,
const paddle::optional<DenseTensor>& branch_max,
const paddle::optional<DenseTensor>& scale_max,
const paddle::optional<DenseTensor>& out_max_in,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const std::vector<int>& paddings_,
const std::vector<int>& dilations_,
const std::vector<int>& strides_,
const std::string& padding_algorithm,
int groups,
int act_type,
Expand All @@ -52,26 +52,23 @@ void Conv2dXPUKernelImpl(const Context& ctx,
auto input_dims = x.dims();
auto filter_dims = filter.dims();
// update paddings and dilations according to padding_algorithm
std::vector<int> paddings_vec = paddings;
std::vector<int> dilations_vec = dilations;
std::vector<int64_t> paddings(paddings_.begin(), paddings_.end());
std::vector<int64_t> dilations(dilations_.begin(), dilations_.end());
std::vector<int64_t> strides(strides_.begin(), strides_.end());
DDim in_data_dims = common::slice_ddim(input_dims, 2, input_dims.size());
DDim filter_data_dims =
common::slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = common::vectorize<int>(filter_data_dims);
phi::UpdatePaddingAndDilation(&paddings_vec,
&dilations_vec,
padding_algorithm,
in_data_dims,
strides,
ksize);
std::vector<int64_t> ksize = common::vectorize<int64_t>(filter_data_dims);
phi::UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);

int batch = static_cast<int>(input_dims[0]);
int in_c = static_cast<int>(input_dims[1]);
int in_h = static_cast<int>(input_dims[2]);
int in_w = static_cast<int>(input_dims[3]);
int out_c = static_cast<int>(filter_dims[0]);
int win_h = static_cast<int>(filter_dims[2]);
int win_w = static_cast<int>(filter_dims[3]);
int64_t batch = input_dims[0];
int64_t in_c = input_dims[1];
int64_t in_h = input_dims[2];
int64_t in_w = input_dims[3];
int64_t out_c = filter_dims[0];
int64_t win_h = filter_dims[2];
int64_t win_w = filter_dims[3];
auto* input_data = reinterpret_cast<const XPUTypeX*>(x.data<T_X>());
const float* input_max_data =
x_max.get_ptr() == nullptr ? nullptr : x_max.get_ptr()->data<float>();
Expand Down Expand Up @@ -130,10 +127,11 @@ void Conv2dXPUKernelImpl(const Context& ctx,
/* int64_t h */ in_h,
/* int64_t w */ in_w,
/* int64_t oc */ out_c,
/* const std::vector<int>& ksize */ std::vector<int>{win_h, win_w},
/* const std::vector<int>& strides */ strides,
/* const std::vector<int>& paddings */ paddings_vec,
/* const std::vector<int>& dilations */ dilations_vec,
/* const std::vector<int64_t>& ksize */
std::vector<int64_t>{win_h, win_w},
/* const std::vector<int64_t>& strides */ strides,
/* const std::vector<int64_t>& paddings */ paddings,
/* const std::vector<int64_t>& dilations */ dilations,
/* int64_t groups */ groups,
/* const float* in_maxptr */ input_max_data,
/* const float* filter_maxptr */ filter_max_data,
Expand Down
29 changes: 15 additions & 14 deletions paddle/phi/kernels/fusion/xpu/conv_transpose_xpu_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ void Conv2dTransposeXPUKernel(const Context& ctx,
const DenseTensor& filter,
const DenseTensor& filter_max,
const paddle::optional<DenseTensor>& bias,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& strides_,
const std::vector<int>& paddings_,
const std::vector<int>& output_padding,
const IntArray& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::vector<int>& dilations_,
const std::string& data_format,
bool has_bias,
bool with_act,
Expand All @@ -48,17 +48,18 @@ void Conv2dTransposeXPUKernel(const Context& ctx,

DDim in_data_dims = slice_ddim(x.dims(), 2, x.dims().size()); // hw
DDim filter_data_dims = slice_ddim(filter.dims(), 2, filter.dims().size());
std::vector<int> ksize = common::vectorize<int>(filter_data_dims);
std::vector<int> paddings_ = paddings;
std::vector<int> dilations_ = dilations;
std::vector<int64_t> ksize = common::vectorize<int64_t>(filter_data_dims);
std::vector<int64_t> strides(strides_.begin(), strides_.end());
std::vector<int64_t> paddings(paddings_.begin(), paddings_.end());
std::vector<int64_t> dilations(dilations_.begin(), dilations_.end());
UpdatePaddingAndDilation(
&paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize);
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);

const int batch_size = static_cast<int>(x.dims()[0]);
const int img_yc = static_cast<int>(x.dims()[1]);
const int img_xc = static_cast<int>(out->dims()[1]);
const int img_xh = static_cast<int>(out->dims()[2]);
const int img_xw = static_cast<int>(out->dims()[3]);
const int64_t batch_size = x.dims()[0];
const int64_t img_yc = x.dims()[1];
const int64_t img_xc = out->dims()[1];
const int64_t img_xh = out->dims()[2];
const int64_t img_xw = out->dims()[3];
auto act = xpu::Activation_t::LINEAR;
if (with_act) {
if (act_type == "relu") {
Expand All @@ -83,8 +84,8 @@ void Conv2dTransposeXPUKernel(const Context& ctx,
img_xc,
ksize,
strides,
paddings_,
dilations_,
paddings,
dilations,
groups,
x_max_data,
filter_max_data,
Expand Down
6 changes: 3 additions & 3 deletions paddle/phi/kernels/fusion/xpu/fast_where_xpu_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ void FastWhereXPUKernel(const Context& ctx,
auto* x_data = reinterpret_cast<const XPUType*>(x.data<T>());
auto* y_data = reinterpret_cast<const XPUType*>(y.data<T>());
auto* out_data = reinterpret_cast<XPUType*>(ctx.template Alloc<T>(out));
auto condition_dims = common::vectorize<int>(condition.dims());
auto x_dims = common::vectorize<int>(x.dims());
auto y_dims = common::vectorize<int>(y.dims());
auto condition_dims = common::vectorize<int64_t>(condition.dims());
auto x_dims = common::vectorize<int64_t>(x.dims());
auto y_dims = common::vectorize<int64_t>(y.dims());
PADDLE_ENFORCE_EQ(
x_dims,
y_dims,
Expand Down
14 changes: 7 additions & 7 deletions paddle/phi/kernels/fusion/xpu/pad2d_xpu_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ namespace fusion {
template <typename T, typename Context>
void Pad2dXPUKernel(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int>& paddings,
const std::vector<int>& paddings_,
const std::string& mode,
float pad_value,
const std::string& data_format,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
std::vector<int> pads = paddings;
std::vector<int64_t> pads(paddings_.begin(), paddings_.end());

auto in_dims = x.dims();
const T* in_data = x.data<T>();
Expand All @@ -48,10 +48,10 @@ void Pad2dXPUKernel(const Context& dev_ctx,
}

T* out_data = dev_ctx.template Alloc<T>(out);
const int num = in_dims[0]; // n
int channels = in_dims[1]; // c
int in_height = in_dims[2]; // xh
int in_width = in_dims[3]; // xw
const int64_t num = in_dims[0]; // n
int64_t channels = in_dims[1]; // c
int64_t in_height = in_dims[2]; // xh
int64_t in_width = in_dims[3]; // xw
if (data_format == "NHWC") {
in_height = in_dims[1]; // xh
in_width = in_dims[2]; // xw
Expand Down Expand Up @@ -111,7 +111,7 @@ void Pad2dXPUKernel(const Context& dev_ctx,
}

// set pad3d's pads to pad2d's pads_xpu
std::vector<int> pads_xpu(4);
std::vector<int64_t> pads_xpu(4);
pads_xpu[0] = pads[2]; // pt
pads_xpu[1] = pads[3]; // pd
pads_xpu[2] = pads[0]; // pl
Expand Down
Loading
Loading