Skip to content

[XPU] fix index's datatype, using int64 instead of int, part 1 (a-f) #72431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions paddle/phi/kernels/funcs/norm_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,31 @@ inline void ExtractNCWHD(const phi::DDim &dims,
: 1;
}
}

inline void ExtractNCWHD(const phi::DDim &dims,
Copy link
Contributor Author

@cqulilujia cqulilujia Apr 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

增加int64类型函数重载

const DataLayout &data_layout,
int64_t *N,
int64_t *C,
int64_t *H,
int64_t *W,
int64_t *D) {
*N = dims[0];
if (dims.size() == 2) {
*C = dims[1];
*H = 1;
*W = 1;
*D = 1;
} else {
*C = data_layout == DataLayout::kNCHW ? dims[1] : dims[dims.size() - 1];
*H = data_layout == DataLayout::kNCHW ? dims[2] : dims[1];
*W = dims.size() > 3
? (data_layout == DataLayout::kNCHW ? dims[3] : dims[2])
: 1;
*D = dims.size() > 4
? (data_layout == DataLayout::kNCHW ? dims[4] : dims[3])
: 1;
}
}

} // namespace funcs
} // namespace phi
8 changes: 4 additions & 4 deletions paddle/phi/kernels/xpu/activation_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ int xpu_activation_backward(const Context& dev_ctx,
const XPUType*,
const XPUType*,
XPUType*,
int)> func) {
int64_t)> func) {
/* TODO: relu tanh sigmoid are inplace */
const XPUType* x_data = nullptr;
const XPUType* y_data = nullptr;
Expand Down Expand Up @@ -446,9 +446,9 @@ void PowGradKernel(const Context& dev_ctx,
T* x_grad = dx->data<T>();

// check dims: all dims should equal
auto x_dims = common::vectorize<int>(x.dims());
auto dy_dims = common::vectorize<int>(dout.dims());
auto dx_dims = common::vectorize<int>(dx->dims());
auto x_dims = common::vectorize<int64_t>(x.dims());
auto dy_dims = common::vectorize<int64_t>(dout.dims());
auto dx_dims = common::vectorize<int64_t>(dx->dims());
PADDLE_ENFORCE_EQ(x_dims,
dy_dims,
errors::PreconditionNotMet("x_dims should match dy_dims."));
Expand Down
10 changes: 5 additions & 5 deletions paddle/phi/kernels/xpu/activation_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ int xpu_activation_func(
const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out,
std::function<int(xpu::Context*, const XPUType*, XPUType*, int)> func) {
std::function<int(xpu::Context*, const XPUType*, XPUType*, int64_t)> func) {
int r = func(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
Expand All @@ -85,8 +85,8 @@ int xpu_activation_func_with_max_x_y(
const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out,
std::function<
int(xpu::Context*, const XPUType*, XPUType*, int, const float*, float*)>
std::function<int(
xpu::Context*, const XPUType*, XPUType*, int64_t, const float*, float*)>
func) {
// does not support "const float* max_x, float* max_y" now
int r = func(dev_ctx.x_context(),
Expand All @@ -106,7 +106,7 @@ int xpu_activation_1attr_func(const Context& dev_ctx,
std::function<int(xpu::Context*,
const XPUType*,
XPUType*,
int,
int64_t,
float,
const float*,
float*)> func) {
Expand All @@ -130,7 +130,7 @@ int xpu_activation_2attr_func(const Context& dev_ctx,
std::function<int(xpu::Context*,
const XPUType*,
XPUType*,
int,
int64_t,
float,
float,
const float*,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/xpu/add_n_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ void AddNArrayKernel(const Context& dev_ctx,
reinterpret_cast<const XPUType*>(out->at(j).data<T>()));

// int sum(Context* ctx, const std::vector<const T*>& x_list, T*
// y, int len);
// y, int64_t len);
int r = xpu::sum(dev_ctx.x_context(),
ptrs,
reinterpret_cast<XPUType*>(out->at(j).data<T>()),
Expand Down
7 changes: 4 additions & 3 deletions paddle/phi/kernels/xpu/affine_channel_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ void AffineChannelGradXPUKernel(const Context& dev_ctx,
const phi::DataLayout layout = common::StringToDataLayout(data_layout);

auto dims = x->dims();
int N = dims[0];
int C = layout == phi::DataLayout::kNCHW ? dims[1] : dims[dims.size() - 1];
int HxW = x->numel() / N / C;
int64_t N = dims[0];
int64_t C =
(layout == phi::DataLayout::kNCHW) ? dims[1] : dims[dims.size() - 1];
int64_t HxW = x->numel() / N / C;

auto* dy_d = dy->data<T>();
auto* scale_d = scale->data<T>();
Expand Down
7 changes: 4 additions & 3 deletions paddle/phi/kernels/xpu/affine_channel_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@ void AffineChannelXPUKernel(const Context& dev_ctx,
const phi::DataLayout layout = common::StringToDataLayout(data_layout);

auto dims = x->dims();
int N = dims[0];
int C = layout == phi::DataLayout::kNCHW ? dims[1] : dims[dims.size() - 1];
int HxW = x->numel() / N / C;
int64_t N = dims[0];
int64_t C =
layout == phi::DataLayout::kNCHW ? dims[1] : dims[dims.size() - 1];
int64_t HxW = x->numel() / N / C;

auto* scale_d = scale->data<T>();
auto* bias_d = bias->data<T>();
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/xpu/amp_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ void UpdateLossScalingKernel(const Context& dev_ctx,
for (size_t i = 0; i < xs.size(); ++i) {
auto* out = outs[i];
T* out_data = dev_ctx.template Alloc<T>(out);
int num = out->numel();
int64_t num = out->numel();
if (cpu_found_inf_data) {
VLOG(1) << "-- UpdateLossScaling: Find infinite grads. --";
int r = 0;
Expand Down
8 changes: 4 additions & 4 deletions paddle/phi/kernels/xpu/arg_min_max_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ void ArgMaxKernel(const Context& dev_ctx,
dtype));
// TODO(ZHUI): fix dtype of out
DDim x_dims;
int axis_val = axis.to<int>();
int64_t axis_val = axis.to<int64_t>();
if (flatten) {
x_dims = common::make_ddim({x.numel()});
// if flatten, the axis just as 0
Expand All @@ -58,7 +58,7 @@ void ArgMaxKernel(const Context& dev_ctx,
x_dims = x.dims();
if (axis_val < 0) axis_val += x_dims.size();
}
auto xdims_vec = common::vectorize<int>(x_dims);
auto xdims_vec = common::vectorize<int64_t>(x_dims);
if (dtype != DataType::INT32) {
dev_ctx.template Alloc<int64_t>(out);
if (x.dims().size() == 0) {
Expand Down Expand Up @@ -130,7 +130,7 @@ void ArgMinKernel(const Context& dev_ctx,
dtype));

DDim x_dims;
int axis_val = axis.to<int>();
int64_t axis_val = axis.to<int64_t>();
if (flatten) {
x_dims = common::make_ddim({x.numel()});
// If flatten, the axis just as 0
Expand All @@ -139,7 +139,7 @@ void ArgMinKernel(const Context& dev_ctx,
x_dims = x.dims();
if (axis_val < 0) axis_val += x_dims.size();
}
auto xdims_vec = common::vectorize<int>(x_dims);
auto xdims_vec = common::vectorize<int64_t>(x_dims);
if (dtype != DataType::INT32) {
dev_ctx.template Alloc<int64_t>(out);
if (x.dims().size() == 0) {
Expand Down
16 changes: 8 additions & 8 deletions paddle/phi/kernels/xpu/argsort_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ void ArgsortGradKernel(const Context& dev_ctx,
if (axis == -1 || axis + 1 == in_dims.size()) {
is_need_transpose = false;
}
int len_before = common::product(common::slice_ddim(in_dims, 0, axis));
int len_after =
auto len_before = common::product(common::slice_ddim(in_dims, 0, axis));
auto len_after =
common::product(common::slice_ddim(in_dims, axis + 1, in_dims.size()));
int m = len_before * len_after;
int n = in_dims[axis];
int len = m * n;
std::vector<int> permute_vec{0, 2, 1};
std::vector<int> data_shape{len_before, n, len_after};
std::vector<int> data_shape_trans{len_before, len_after, n};
auto m = len_before * len_after;
auto n = in_dims[axis];
auto len = m * n;
std::vector<int64_t> permute_vec{0, 2, 1};
std::vector<int64_t> data_shape{len_before, n, len_after};
std::vector<int64_t> data_shape_trans{len_before, len_after, n};

const int64_t* indices_data = indices.data<int64_t>();
const T* out_grad_data = out_grad.data<T>();
Expand Down
Loading
Loading