From c2ade654bf4d132f40fd8136f0cec5582d01cd73 Mon Sep 17 00:00:00 2001 From: qili93 Date: Tue, 19 Dec 2023 19:05:29 +0800 Subject: [PATCH 1/3] [NPU] fix compile and update to release 2.6 --- .gitmodules | 2 +- Paddle | 2 +- backends/npu/kernels/batch_norm_kernel.cc | 4 ++-- backends/npu/kernels/conv_transpose_kernel.cc | 2 +- backends/npu/kernels/fill_diagonal_tensor_kernel.cc | 6 ++++-- backends/npu/kernels/funcs/string_helper.cc | 4 ++-- backends/npu/kernels/group_norm_kernel.cc | 3 ++- backends/npu/kernels/pool2d_kernel.cc | 4 ++-- backends/npu/runtime/runtime.cc | 12 +++++++++--- 9 files changed, 24 insertions(+), 15 deletions(-) diff --git a/.gitmodules b/.gitmodules index ac8df4bfd..2d86e54f9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "Paddle"] path = Paddle url = https://github.com/PaddlePaddle/Paddle.git - branch = develop + branch = release/2.6 diff --git a/Paddle b/Paddle index a98e99793..78c5e680e 160000 --- a/Paddle +++ b/Paddle @@ -1 +1 @@ -Subproject commit a98e9979363ca44142c8ebd1f3896721ac4b59ef +Subproject commit 78c5e680eb2541c1e73f3a27046111b34d2f6047 diff --git a/backends/npu/kernels/batch_norm_kernel.cc b/backends/npu/kernels/batch_norm_kernel.cc index 37807d5a1..451626a53 100644 --- a/backends/npu/kernels/batch_norm_kernel.cc +++ b/backends/npu/kernels/batch_norm_kernel.cc @@ -61,7 +61,7 @@ void BatchNormKernel(const Context& dev_ctx, auto* Bias = bias.get_ptr(); phi::DenseTensor new_scale, new_bias; - const auto data_layout = phi::StringToDataLayout(data_layout_str); + const auto data_layout = common::StringToDataLayout(data_layout_str); int C; if (x_dims.size() == 2) { @@ -308,7 +308,7 @@ void BatchNormGradKernel( auto* Bias = bias.get_ptr(); phi::DenseTensor new_scale, new_bias; - const auto data_layout = phi::StringToDataLayout(data_layout_str); + const auto data_layout = common::StringToDataLayout(data_layout_str); int C; if (x_dims.size() == 2) { diff --git a/backends/npu/kernels/conv_transpose_kernel.cc b/backends/npu/kernels/conv_transpose_kernel.cc index b1cbec7ab..8bc239069 100644 --- a/backends/npu/kernels/conv_transpose_kernel.cc +++ b/backends/npu/kernels/conv_transpose_kernel.cc @@ -119,7 +119,7 @@ void Conv2dTransposeGradKernel(const Context& dev_ctx, auto dilations = dilation; if ((!dx) && (!dfilter)) return; - const phi::DataLayout data_layout = phi::StringToDataLayout(data_format); + const phi::DataLayout data_layout = common::StringToDataLayout(data_format); auto in_dims = x.dims(); auto filter_dims = filter.dims(); diff --git a/backends/npu/kernels/fill_diagonal_tensor_kernel.cc b/backends/npu/kernels/fill_diagonal_tensor_kernel.cc index f11b0045d..9875f9530 100644 --- a/backends/npu/kernels/fill_diagonal_tensor_kernel.cc +++ b/backends/npu/kernels/fill_diagonal_tensor_kernel.cc @@ -40,7 +40,8 @@ void FillDiagonalTensorKernel(const Context &dev_ctx, int64_t new_dims[2], strides[2]; std::vector matdim; matdim.resize(fill_dims[0]); - CalMatDims(out_dims, dim1, dim2, &offset, new_dims, strides, matdim.data()); + phi::CalMatDims( + out_dims, dim1, dim2, &offset, new_dims, strides, matdim.data()); PADDLE_ENFORCE_EQ( new_dims[0], fill_dims[0], @@ -125,7 +126,8 @@ void FillDiagonalTensorGradKernel(const Context &dev_ctx, int64_t new_dims[2], strides[2]; std::vector matdim; matdim.resize(matrows); - CalMatDims(dx_dims, dim1, dim2, &offset, new_dims, strides, matdim.data()); + phi::CalMatDims( + dx_dims, dim1, dim2, &offset, new_dims, strides, matdim.data()); auto size = x_grad->numel(); diff --git a/backends/npu/kernels/funcs/string_helper.cc b/backends/npu/kernels/funcs/string_helper.cc index 3961352d7..a9b2d85a7 100644 --- a/backends/npu/kernels/funcs/string_helper.cc +++ b/backends/npu/kernels/funcs/string_helper.cc @@ -173,8 +173,8 @@ std::string GetPDTensorString(const Context& dev_ctx, log_stream << " - place: " << print_tensor.place() << std::endl; log_stream << " - shape: " << print_tensor.dims().to_str() << std::endl; - log_stream << " - layout: " << phi::DataLayoutToString(print_tensor.layout()) - << std::endl; + log_stream << " - layout: " + << common::DataLayoutToString(print_tensor.layout()) << std::endl; auto dtype = print_tensor.dtype(); log_stream << " - dtype: " << dtype << std::endl; diff --git a/backends/npu/kernels/group_norm_kernel.cc b/backends/npu/kernels/group_norm_kernel.cc index a764a1f2e..299a8733d 100644 --- a/backends/npu/kernels/group_norm_kernel.cc +++ b/backends/npu/kernels/group_norm_kernel.cc @@ -152,7 +152,8 @@ void GroupNormKernel(const Context& dev_ctx, phi::DenseTensor* mean, phi::DenseTensor* variance) { auto x_dims = phi::vectorize(x.dims()); - const phi::DataLayout data_layout_data = phi::StringToDataLayout(data_layout); + const phi::DataLayout data_layout_data = + common::StringToDataLayout(data_layout); if (x_dims.size() > 3) { phi::DenseTensor x_tmp(x); diff --git a/backends/npu/kernels/pool2d_kernel.cc b/backends/npu/kernels/pool2d_kernel.cc index d821ba781..c1588de3c 100644 --- a/backends/npu/kernels/pool2d_kernel.cc +++ b/backends/npu/kernels/pool2d_kernel.cc @@ -164,11 +164,11 @@ void Pool2dKernel(const Context& dev_ctx, // AdaptiveAvgPool2d only support NCHW phi::DenseTensor transformed_input, transformed_output; if (pooling_type == "avg" && channel_last) { - transformed_input.Resize(phi::make_dim( + transformed_input.Resize(common::make_dim( in_x_dims[0], in_x_dims[3], in_x_dims[1], in_x_dims[2])); dev_ctx.template Alloc(&transformed_input); transformed_output.Resize( - phi::make_dim(out_dims[0], out_dims[3], out_dims[1], out_dims[2])); + common::make_dim(out_dims[0], out_dims[3], out_dims[1], out_dims[2])); dev_ctx.template Alloc(&transformed_output); const auto& trans_runner = diff --git a/backends/npu/runtime/runtime.cc b/backends/npu/runtime/runtime.cc index 4e9b189ef..b3ac3cb16 100644 --- a/backends/npu/runtime/runtime.cc +++ b/backends/npu/runtime/runtime.cc @@ -34,7 +34,7 @@ FLAGS_DEFINE_uint64(npu_profiling_dtypes, ACL_PROF_HCCL_TRACE | ACL_PROF_RUNTIME_API, "ACL datatypes to profile"); FLAGS_DEFINE_uint64(npu_profiling_metrics, - static_cast(ACL_AICORE_ARITHMETIC_UTILIZATION), + static_cast(ACL_AICORE_PIPE_UTILIZATION), "AI Core metric to profile"); FLAGS_DEFINE_bool(set_to_1d, true, "set_to_1d"); @@ -199,7 +199,10 @@ aclrtStream SecondaryStream::Get(aclrtStream aicore_stream) { void SecondaryStream::Create(aclrtStream aicore_stream) { RUN_CHECK(aicpu_streams.find(aicore_stream) == aicpu_streams.cend()); aclrtStream aicpu_stream; - ACL_CHECK(aclrtCreateStream(&aicpu_stream)); + ACL_CHECK(aclrtCreateStreamWithConfig( + reinterpret_cast(&aicpu_stream), + 0, + (ACL_STREAM_FAST_LAUNCH | ACL_STREAM_FAST_SYNC))); aicpu_streams[aicore_stream] = aicpu_stream; } @@ -597,7 +600,10 @@ C_Status HostDeallocate(const C_Device device, void *ptr, size_t size) { } C_Status CreateStream(const C_Device device, C_Stream *stream) { - ACL_CHECK(aclrtCreateStream(reinterpret_cast(stream))); + ACL_CHECK(aclrtCreateStreamWithConfig( + reinterpret_cast(stream), + 0, + (ACL_STREAM_FAST_LAUNCH | ACL_STREAM_FAST_SYNC))); LOG_IF(INFO, FLAGS_npu_runtime_debug) << "[RUNTIME] CreateStream: device=" << device->id << ", stream=" << *stream; From dbefd781e8b0f36569294e0de97d21a88c4f03b8 Mon Sep 17 00:00:00 2001 From: qili93 Date: Tue, 19 Dec 2023 21:37:43 +0800 Subject: [PATCH 2/3] fix unit test error --- python/tests/auto_parallel_op_test.py | 1 + 1 file changed, 1 insertion(+) create mode 120000 python/tests/auto_parallel_op_test.py diff --git a/python/tests/auto_parallel_op_test.py b/python/tests/auto_parallel_op_test.py new file mode 120000 index 000000000..2136c36b4 --- /dev/null +++ b/python/tests/auto_parallel_op_test.py @@ -0,0 +1 @@ +../../Paddle/test/legacy_test/auto_parallel_op_test.py \ No newline at end of file From 8e9a88d70e4a1585e56a8e628b01e7af099f65a4 Mon Sep 17 00:00:00 2001 From: qili93 Date: Wed, 20 Dec 2023 10:28:23 +0800 Subject: [PATCH 3/3] [NPU] add disable ut case --- backends/npu/tools/disable_ut_npu | 3 +++ backends/npu/tools/pr_ci_npu.sh | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/backends/npu/tools/disable_ut_npu b/backends/npu/tools/disable_ut_npu index 073aeeaab..49a079275 100755 --- a/backends/npu/tools/disable_ut_npu +++ b/backends/npu/tools/disable_ut_npu @@ -9,3 +9,6 @@ test_zero_dim_tensor_npu test_momentum_op_npu test_matmul_op_npu test_linear_op_npu +test_compare_op_npu +test_elementwise_sub_op_npu +test_index_sample_op_npu diff --git a/backends/npu/tools/pr_ci_npu.sh b/backends/npu/tools/pr_ci_npu.sh index 217351158..67a68c341 100644 --- a/backends/npu/tools/pr_ci_npu.sh +++ b/backends/npu/tools/pr_ci_npu.sh @@ -18,7 +18,6 @@ # For Paddle CI #================================================= -set -ex if [ -z ${PADDLE_BRANCH} ]; then PADDLE_BRANCH="develop"