Skip to content

Commit dddc687

Browse files
authored
revert to 1319992 and support musa (#64478)
* Revert "paddle_musa v2.6.0 release initialization (#64265)" This reverts commit 6caf5d5. * update to v2.6.0 * enable WITH_DISTRIBUTED in CMakeLists.txt and port related source file from cuda to musa * fix some bugs when WITH_DISTRIBUTED is enabled * delete useless cout in ../paddle/phi/backends/gpu/musa/musa_info.cc and set compute capacity to 9.9 for UT * fix some bugs when upgrading to v2.6.1
1 parent 6caf5d5 commit dddc687

File tree

180 files changed

+6280
-1315
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

180 files changed

+6280
-1315
lines changed

.gitmodules

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,11 @@
110110
path = third_party/cccl
111111
url = https://github.com/NVIDIA/cccl.git
112112
ignore = dirty
113+
[submodule "third_party/cryptopp"]
114+
path = third_party/cryptopp
115+
url = https://github.com/weidai11/cryptopp.git
116+
ignore = dirty
117+
[submodule "third_party/cryptopp-cmake"]
118+
path = third_party/cryptopp-cmake
119+
url = https://github.com/noloader/cryptopp-cmake.git
120+
ignore = dirty

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ PaddlePaddle is originated from industrial practices with dedication and commitm
2020

2121
## Installation
2222

23-
### Latest PaddlePaddle Release: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
23+
### Latest PaddlePaddle Release: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)
2424

2525
Our vision is to enable deep learning for everyone via PaddlePaddle.
2626
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle.

README_cn.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818

1919
## 安装
2020

21-
### PaddlePaddle最新版本: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
21+
### PaddlePaddle 最新版本: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)
2222

23-
跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
23+
跟进 PaddlePaddle 最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
2424

2525
### 安装最新稳定版本:
2626
```

README_ja.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ PaddlePaddle は、工業化に対するコミットメントを持つ工業的
2020

2121
## インストール
2222

23-
### PaddlePaddle の最新リリース: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
23+
### PaddlePaddle の最新リリース: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)
2424

2525
私たちのビジョンは、PaddlePaddle を通じて、誰もが深層学習を行えるようにすることです。
2626
PaddlePaddle の最新機能を追跡するために、私たちの[リリースのお知らせ](https://github.com/PaddlePaddle/Paddle/releases)を参照してください。

cmake/external/cryptopp.cmake

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@
1414

1515
include(ExternalProject)
1616

17+
set(CRYPTOPP_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/cryptopp)
18+
set(CRYPTOPP_CMAKE_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/cryptopp-cmake)
1719
set(CRYPTOPP_PREFIX_DIR ${THIRD_PARTY_PATH}/cryptopp)
1820
set(CRYPTOPP_INSTALL_DIR ${THIRD_PARTY_PATH}/install/cryptopp)
1921
set(CRYPTOPP_INCLUDE_DIR
2022
"${CRYPTOPP_INSTALL_DIR}/include"
2123
CACHE PATH "cryptopp include directory." FORCE)
22-
set(CRYPTOPP_REPOSITORY ${GIT_URL}/weidai11/cryptopp.git)
2324
set(CRYPTOPP_TAG CRYPTOPP_8_2_0)
2425

2526
if(WIN32)
@@ -63,17 +64,16 @@ include_directories(${CRYPTOPP_INCLUDE_DIR})
6364
ExternalProject_Add(
6465
extern_cryptopp
6566
${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE}
66-
GIT_REPOSITORY ${CRYPTOPP_REPOSITORY}
67-
GIT_TAG ${CRYPTOPP_TAG}
6867
PREFIX ${CRYPTOPP_PREFIX_DIR}
68+
SOURCE_DIR ${CRYPTOPP_SOURCE_DIR}
6969
UPDATE_COMMAND ""
7070
PATCH_COMMAND
71-
COMMAND ${CMAKE_COMMAND} -E remove_directory "<SOURCE_DIR>/cmake/"
72-
COMMAND git clone ${GIT_URL}/noloader/cryptopp-cmake "<SOURCE_DIR>/cmake"
73-
COMMAND cd "<SOURCE_DIR>/cmake" && git checkout tags/${CRYPTOPP_TAG} -b
74-
${CRYPTOPP_TAG}
75-
COMMAND ${CMAKE_COMMAND} -E copy_directory "<SOURCE_DIR>/cmake/"
76-
"<SOURCE_DIR>/"
71+
COMMAND ${CMAKE_COMMAND} -E copy "${CRYPTOPP_CMAKE_SOURCE_DIR}/CMakeLists.txt"
72+
"<SOURCE_DIR>/CMakeLists.txt"
73+
COMMAND
74+
${CMAKE_COMMAND} -E copy
75+
"${CRYPTOPP_CMAKE_SOURCE_DIR}/cryptopp-config.cmake"
76+
"<SOURCE_DIR>/cryptopp-config.cmake"
7777
COMMAND ${CRYPTOPP_PATCH_COMMAND}
7878
INSTALL_DIR ${CRYPTOPP_INSTALL_DIR}
7979
CMAKE_ARGS ${CRYPTOPP_CMAKE_ARGS}

cmake/generic.cmake

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -882,12 +882,6 @@ function(hip_library TARGET_NAME)
882882
cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}"
883883
"${multiValueArgs}" ${ARGN})
884884
if(hip_library_SRCS)
885-
# FindHIP.cmake defined hip_add_library, HIP_SOURCE_PROPERTY_FORMAT is requried if no .cu files found
886-
if(NOT (${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/operators"
887-
OR ${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/phi/kernels"))
888-
set_source_files_properties(${hip_library_SRCS}
889-
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
890-
endif()
891885
if(hip_library_SHARED OR hip_library_shared) # build *.so
892886
hip_add_library(${TARGET_NAME} SHARED ${hip_library_SRCS})
893887
else()
@@ -901,6 +895,10 @@ function(hip_library TARGET_NAME)
901895
endif()
902896
# cpplint code style
903897
foreach(source_file ${hip_library_SRCS})
898+
if(NOT ${source_file} MATCHES "\\.cu$")
899+
set_source_files_properties(${source_file}
900+
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
901+
endif()
904902
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
905903
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
906904
list(APPEND hip_library_HEADERS

cmake/inference_lib.cmake

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,16 @@ copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_INSTALL_DIR})
237237

238238
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
239239

240+
if(WIN32)
241+
set(paddle_common_lib ${PADDLE_BINARY_DIR}/paddle/common/common.*)
242+
else()
243+
set(paddle_common_lib ${PADDLE_BINARY_DIR}/paddle/common/libcommon.*)
244+
endif()
245+
copy(
246+
inference_lib_dist
247+
SRCS ${paddle_common_lib}
248+
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
249+
240250
if(WIN32)
241251
if(WITH_STATIC_LIB)
242252
set(paddle_inference_lib
@@ -268,11 +278,6 @@ else()
268278
SRCS ${paddle_phi_lib}
269279
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
270280
endif()
271-
set(paddle_common_lib ${PADDLE_BINARY_DIR}/paddle/common/libcommon.*)
272-
copy(
273-
inference_lib_dist
274-
SRCS ${paddle_common_lib}
275-
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
276281
endif()
277282

278283
copy(

paddle/cinn/ir/ir_base.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,16 +110,23 @@ class Dim;
110110
macro__(Product) \
111111
macro__(Sum) \
112112
macro__(PrimitiveNode) \
113-
macro__(IntrinsicOp) \
114113
macro__(_BufferRange_) \
115114
macro__(ScheduleBlock) \
116115
macro__(ScheduleBlockRealize) \
117116
macro__(_Dim_) \
118117

118+
#define NODETY_CONTROL_OP_FOR_INTRINSIC(macro__) \
119+
macro__(IntrinsicOp) \
119120

120121
#define NODETY_FORALL(__m) \
121122
NODETY_PRIMITIVE_TYPE_FOR_EACH(__m) \
122123
NODETY_OP_FOR_EACH(__m) \
124+
NODETY_CONTROL_OP_FOR_INTRINSIC(__m) \
125+
NODETY_CONTROL_OP_FOR_EACH(__m)
126+
127+
#define NODETY_FORALL_EXCEPT_INTRINSIC(__m) \
128+
NODETY_PRIMITIVE_TYPE_FOR_EACH(__m) \
129+
NODETY_OP_FOR_EACH(__m) \
123130
NODETY_CONTROL_OP_FOR_EACH(__m)
124131
// clang-format on
125132

paddle/cinn/ir/utils/ir_nodes_collector.cc

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include "paddle/cinn/ir/utils/ir_nodes_collector.h"
1616
#include <glog/logging.h>
1717

18+
#include "paddle/cinn/ir/intrinsic_ops.h"
19+
#include "paddle/cinn/ir/ir.h"
1820
#include "paddle/cinn/ir/ir_mutator.h"
1921
#include "paddle/cinn/ir/ir_printer.h"
2022

@@ -71,8 +73,71 @@ struct IrNodesCollector : public IRVisitorRequireReImpl<void> {
7173
} \
7274
}
7375

74-
NODETY_FORALL(__m)
76+
NODETY_FORALL_EXCEPT_INTRINSIC(__m)
7577
#undef __m
78+
79+
void Visit(const ir::IntrinsicOp* op) {
80+
switch (op->getKind()) {
81+
#define __(x) \
82+
case ir::IntrinsicKind::k##x: \
83+
Visit(llvm::dyn_cast<ir::intrinsics::x>(op)); \
84+
break;
85+
86+
INTRINSIC_KIND_FOR_EACH(__)
87+
#undef __
88+
}
89+
}
90+
91+
void Visit(const ir::intrinsics::GetAddr* x) {
92+
if (x->data.defined()) {
93+
Visit(&(x->data));
94+
}
95+
}
96+
97+
void Visit(const ir::intrinsics::BufferGetDataHandle* x) {
98+
if (x->buffer.defined()) {
99+
Visit(&(x->buffer));
100+
}
101+
}
102+
103+
void Visit(const ir::intrinsics::BufferGetDataConstHandle* x) {
104+
if (x->buffer.defined()) {
105+
Visit(&(x->buffer));
106+
}
107+
}
108+
109+
void Visit(const ir::intrinsics::PodValueToX* x) {
110+
if (x->pod_value_ptr.defined()) {
111+
Visit(&(x->pod_value_ptr));
112+
}
113+
}
114+
115+
void Visit(const ir::intrinsics::BufferCreate* x) {
116+
if (x->buffer.defined()) {
117+
Visit(&(x->buffer));
118+
}
119+
}
120+
121+
void Visit(const ir::intrinsics::ArgsConstruct* x) {
122+
if (x->var.defined()) {
123+
Expr convert = Expr(x->var);
124+
Visit(&convert);
125+
}
126+
for (int i = 0; i < x->args.size(); ++i) {
127+
if (x->args[i].defined()) {
128+
Visit(&(x->args[i]));
129+
}
130+
}
131+
}
132+
133+
void Visit(const ir::intrinsics::BuiltinIntrin* x) {
134+
for (int i = 0; i < x->args.size(); ++i) {
135+
if (x->args[i].defined()) {
136+
Visit(&(x->args[i]));
137+
}
138+
}
139+
}
140+
76141
std::set<void*> visited_;
77142
};
78143

paddle/fluid/distributed/common/chunk_allocator.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#pragma once
1616
#include <glog/logging.h>
17+
#include "paddle/fluid/platform/enforce.h"
1718

1819
namespace paddle {
1920
namespace distributed {
@@ -77,9 +78,16 @@ class ChunkAllocator {
7778

7879
void create_new_chunk() {
7980
Chunk* chunk;
80-
posix_memalign(reinterpret_cast<void**>(&chunk),
81-
std::max<size_t>(sizeof(void*), alignof(Chunk)),
82-
sizeof(Chunk) + sizeof(Node) * _chunk_size);
81+
size_t alloc_size = sizeof(Chunk) + sizeof(Node) * _chunk_size;
82+
int error = posix_memalign(reinterpret_cast<void**>(&chunk),
83+
std::max<size_t>(sizeof(void*), alignof(Chunk)),
84+
alloc_size);
85+
PADDLE_ENFORCE_EQ(error,
86+
0,
87+
paddle::platform::errors::ResourceExhausted(
88+
"Fail to alloc memory of %ld size, error code is %d.",
89+
alloc_size,
90+
error));
8391
chunk->next = _chunks;
8492
_chunks = chunk;
8593

paddle/fluid/eager/api/manual/eager_manual/forwards/multiply_fwd_func.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ paddle::Tensor multiply_ad_func(const paddle::Tensor& x,
6161
// Type promotion Logic
6262
if (phi::NeedTypePromotion(x.dtype(), y.dtype())) {
6363
VLOG(5) << "got different data type, run type protmotion automatically.";
64-
LOG(WARNING) << "got different data type, run type protmotion "
65-
"automatically, this may cause data type been changed.";
64+
LOG_FIRST_N(WARNING, 1)
65+
<< "got different data type, run type protmotion "
66+
"automatically, this may cause data type been changed.";
6667
auto op_name = phi::TransToFluidOpName("multiply");
6768
auto promotion_type = phi::GetPromoteDtype(op_name, x.dtype(), y.dtype());
6869

@@ -407,8 +408,9 @@ paddle::Tensor multiply_ad_func(const paddle::Tensor& x,
407408
// Type promotion Logic
408409
if (phi::NeedTypePromotion(x.dtype(), y.dtype())) {
409410
VLOG(5) << "got different data type, run type protmotion automatically.";
410-
LOG(WARNING) << "got different data type, run type protmotion "
411-
"automatically, this may cause data type been changed.";
411+
LOG_FIRST_N(WARNING, 1)
412+
<< "got different data type, run type protmotion "
413+
"automatically, this may cause data type been changed.";
412414
auto op_name = phi::TransToFluidOpName("multiply");
413415
auto promotion_type = phi::GetPromoteDtype(op_name, x.dtype(), y.dtype());
414416

paddle/fluid/eager/auto_code_generator/generator/eager_gen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ class {} : public egr::GradNodeBase {{
528528

529529
TYPE_PROMOTION_LOGIC_TEMPLATE = """ if (phi::NeedTypePromotion({x}.dtype(), {y}.dtype())) {{
530530
VLOG(5) << "got different data type, run type protmotion automatically.";
531-
LOG(WARNING) << "got different data type, run type protmotion automatically, this may cause data type been changed.";
531+
LOG_FIRST_N(WARNING, 1) << "got different data type, run type protmotion automatically, this may cause data type been changed.";
532532
{op_name}
533533
auto promotion_type = phi::GetPromoteDtype(op_name, {x}.dtype(), {y}.dtype());
534534

paddle/fluid/inference/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
3535
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
3636
get_property(ir_targets GLOBAL PROPERTY IR_TARGETS)
3737
get_property(not_infer_modules GLOBAL PROPERTY NOT_INFER_MODULES)
38-
set(utils_modules pretty_log string_helper benchmark utf8proc)
38+
set(utils_modules pretty_log string_helper utf8proc)
3939

4040
if(NOT WITH_GFLAGS)
4141
set(utils_modules ${utils_modules} paddle_flags)

paddle/fluid/inference/api/analysis_config.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,11 @@ void AnalysisConfig::EnableXpu(int l3_size,
180180
bool transformer_encoder_adaptive_seqlen,
181181
bool enable_multi_stream) {
182182
#if defined(PADDLE_WITH_XPU) || defined(LITE_SUBGRAPH_WITH_XPU)
183+
LOG_FIRST_N(WARNING, 1)
184+
<< "Parameters in EnableXpu/enable_xpu is deprecated since version "
185+
"2.6.1, and will be removed in version 3.0! Please use "
186+
"EnableXpu/enable_xpu without parameters, and use "
187+
"SetXpuConfig/set_xpu_config to set options.";
183188
use_xpu_ = true;
184189
xpu_config_.l3_size = l3_size;
185190
xpu_config_.conv_autotune_level = conv_autotune;

paddle/fluid/inference/api/paddle_analysis_config.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ struct PD_INFER_DECL XpuConfig {
111111
bool conv_autotune_file_writeback{false};
112112

113113
// Fc autotune level. The Optional values are 0-9. Default 0 means no
114+
// autotune.
114115
int fc_autotune_level{0};
115116
// Base fc autotune info is read from fc_autotune_file.
116117
std::string fc_autotune_file;
@@ -367,7 +368,7 @@ struct PD_INFER_DECL AnalysisConfig {
367368
///
368369
void EnableXpu(int l3_size = 0xfffc00,
369370
bool l3_locked = false,
370-
bool conv_autotune = true,
371+
bool conv_autotune = false,
371372
const std::string& conv_autotune_file = "",
372373
const std::string& transformer_encoder_precision = "int16",
373374
bool transformer_encoder_adaptive_seqlen = false,

paddle/fluid/inference/tensorrt/op_teller.cc

100755100644
Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct SimpleOpTypeSetTeller : public Teller {
4747
#endif
4848
#if IS_TRT_VERSION_GE(7000)
4949
teller_set.insert("tile");
50+
int8_teller_set.insert("tile");
5051
teller_set.insert("flatten_contiguous_range");
5152
int8_teller_set.insert("flatten_contiguous_range");
5253
teller_set.insert("rnn");
@@ -2302,15 +2303,20 @@ struct SimpleOpTypeSetTeller : public Teller {
23022303
if (!with_dynamic_shape) {
23032304
if (tile_inputs.find("repeat_times_tensor") != tile_inputs.end()) {
23042305
if (!desc.Input("repeat_times_tensor").empty()) {
2306+
VLOG(3) << "Tile op: repeat_times_tensor is not empty.";
23052307
return false;
23062308
}
23072309
}
23082310
if (tile_inputs.find("RepeatTimes") != tile_inputs.end()) {
23092311
if (!desc.Input("RepeatTimes").empty()) {
2312+
VLOG(3) << "Tile op: RepeatTimes is not empty.";
23102313
return false;
23112314
}
23122315
}
2313-
if (!desc.HasAttr("repeat_times")) return false;
2316+
if (!desc.HasAttr("repeat_times")) {
2317+
VLOG(3) << "Tile op:`repeat_times` is not set.";
2318+
return false;
2319+
}
23142320
}
23152321
}
23162322
#endif
Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
cc_library(
2-
benchmark
3-
SRCS benchmark.cc
4-
DEPS enforce common)
5-
paddle_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark)
61
cc_library(
72
infer_io_utils
83
SRCS io_utils.cc
@@ -13,13 +8,5 @@ cc_library(
138
DEPS proto_desc enforce common)
149

1510
cc_library(table_printer SRCS table_printer.cc)
16-
paddle_test(test_table_printer SRCS table_printer_tester.cc)
1711

1812
proto_library(shape_range_info_proto SRCS shape_range_info.proto)
19-
20-
if(WITH_ONNXRUNTIME AND WIN32)
21-
# Copy onnxruntime for some c++ test in Windows, since the test will
22-
# be build only in CI, so suppose the generator in Windows is Ninja.
23-
copy_onnx(test_benchmark)
24-
copy_onnx(test_table_printer)
25-
endif()

0 commit comments

Comments
 (0)