Skip to content

revert to 131999233ef997fc8d3f24b27830925b78cf17aa and support musa #64478

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,11 @@
path = third_party/cccl
url = https://github.com/NVIDIA/cccl.git
ignore = dirty
[submodule "third_party/cryptopp"]
path = third_party/cryptopp
url = https://github.com/weidai11/cryptopp.git
ignore = dirty
[submodule "third_party/cryptopp-cmake"]
path = third_party/cryptopp-cmake
url = https://github.com/noloader/cryptopp-cmake.git
ignore = dirty
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ PaddlePaddle is originated from industrial practices with dedication and commitm

## Installation

### Latest PaddlePaddle Release: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
### Latest PaddlePaddle Release: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)

Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle.
Expand Down
4 changes: 2 additions & 2 deletions README_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

## 安装

### PaddlePaddle最新版本: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
### PaddlePaddle 最新版本: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)

跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
跟进 PaddlePaddle 最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)

### 安装最新稳定版本:
```
Expand Down
2 changes: 1 addition & 1 deletion README_ja.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ PaddlePaddle は、工業化に対するコミットメントを持つ工業的

## インストール

### PaddlePaddle の最新リリース: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
### PaddlePaddle の最新リリース: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)

私たちのビジョンは、PaddlePaddle を通じて、誰もが深層学習を行えるようにすることです。
PaddlePaddle の最新機能を追跡するために、私たちの[リリースのお知らせ](https://github.com/PaddlePaddle/Paddle/releases)を参照してください。
Expand Down
18 changes: 9 additions & 9 deletions cmake/external/cryptopp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@

include(ExternalProject)

set(CRYPTOPP_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/cryptopp)
set(CRYPTOPP_CMAKE_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/cryptopp-cmake)
set(CRYPTOPP_PREFIX_DIR ${THIRD_PARTY_PATH}/cryptopp)
set(CRYPTOPP_INSTALL_DIR ${THIRD_PARTY_PATH}/install/cryptopp)
set(CRYPTOPP_INCLUDE_DIR
"${CRYPTOPP_INSTALL_DIR}/include"
CACHE PATH "cryptopp include directory." FORCE)
set(CRYPTOPP_REPOSITORY ${GIT_URL}/weidai11/cryptopp.git)
set(CRYPTOPP_TAG CRYPTOPP_8_2_0)

if(WIN32)
Expand Down Expand Up @@ -63,17 +64,16 @@ include_directories(${CRYPTOPP_INCLUDE_DIR})
ExternalProject_Add(
extern_cryptopp
${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE}
GIT_REPOSITORY ${CRYPTOPP_REPOSITORY}
GIT_TAG ${CRYPTOPP_TAG}
PREFIX ${CRYPTOPP_PREFIX_DIR}
SOURCE_DIR ${CRYPTOPP_SOURCE_DIR}
UPDATE_COMMAND ""
PATCH_COMMAND
COMMAND ${CMAKE_COMMAND} -E remove_directory "<SOURCE_DIR>/cmake/"
COMMAND git clone ${GIT_URL}/noloader/cryptopp-cmake "<SOURCE_DIR>/cmake"
COMMAND cd "<SOURCE_DIR>/cmake" && git checkout tags/${CRYPTOPP_TAG} -b
${CRYPTOPP_TAG}
COMMAND ${CMAKE_COMMAND} -E copy_directory "<SOURCE_DIR>/cmake/"
"<SOURCE_DIR>/"
COMMAND ${CMAKE_COMMAND} -E copy "${CRYPTOPP_CMAKE_SOURCE_DIR}/CMakeLists.txt"
"<SOURCE_DIR>/CMakeLists.txt"
COMMAND
${CMAKE_COMMAND} -E copy
"${CRYPTOPP_CMAKE_SOURCE_DIR}/cryptopp-config.cmake"
"<SOURCE_DIR>/cryptopp-config.cmake"
COMMAND ${CRYPTOPP_PATCH_COMMAND}
INSTALL_DIR ${CRYPTOPP_INSTALL_DIR}
CMAKE_ARGS ${CRYPTOPP_CMAKE_ARGS}
Expand Down
10 changes: 4 additions & 6 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -882,12 +882,6 @@ function(hip_library TARGET_NAME)
cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
if(hip_library_SRCS)
# FindHIP.cmake defined hip_add_library, HIP_SOURCE_PROPERTY_FORMAT is requried if no .cu files found
if(NOT (${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/operators"
OR ${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/phi/kernels"))
set_source_files_properties(${hip_library_SRCS}
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
endif()
if(hip_library_SHARED OR hip_library_shared) # build *.so
hip_add_library(${TARGET_NAME} SHARED ${hip_library_SRCS})
else()
Expand All @@ -901,6 +895,10 @@ function(hip_library TARGET_NAME)
endif()
# cpplint code style
foreach(source_file ${hip_library_SRCS})
if(NOT ${source_file} MATCHES "\\.cu$")
set_source_files_properties(${source_file}
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
endif()
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND hip_library_HEADERS
Expand Down
15 changes: 10 additions & 5 deletions cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,16 @@ copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_INSTALL_DIR})

set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")

if(WIN32)
set(paddle_common_lib ${PADDLE_BINARY_DIR}/paddle/common/common.*)
else()
set(paddle_common_lib ${PADDLE_BINARY_DIR}/paddle/common/libcommon.*)
endif()
copy(
inference_lib_dist
SRCS ${paddle_common_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)

if(WIN32)
if(WITH_STATIC_LIB)
set(paddle_inference_lib
Expand Down Expand Up @@ -268,11 +278,6 @@ else()
SRCS ${paddle_phi_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
endif()
set(paddle_common_lib ${PADDLE_BINARY_DIR}/paddle/common/libcommon.*)
copy(
inference_lib_dist
SRCS ${paddle_common_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
endif()

copy(
Expand Down
9 changes: 8 additions & 1 deletion paddle/cinn/ir/ir_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,23 @@ class Dim;
macro__(Product) \
macro__(Sum) \
macro__(PrimitiveNode) \
macro__(IntrinsicOp) \
macro__(_BufferRange_) \
macro__(ScheduleBlock) \
macro__(ScheduleBlockRealize) \
macro__(_Dim_) \

#define NODETY_CONTROL_OP_FOR_INTRINSIC(macro__) \
macro__(IntrinsicOp) \

#define NODETY_FORALL(__m) \
NODETY_PRIMITIVE_TYPE_FOR_EACH(__m) \
NODETY_OP_FOR_EACH(__m) \
NODETY_CONTROL_OP_FOR_INTRINSIC(__m) \
NODETY_CONTROL_OP_FOR_EACH(__m)

#define NODETY_FORALL_EXCEPT_INTRINSIC(__m) \
NODETY_PRIMITIVE_TYPE_FOR_EACH(__m) \
NODETY_OP_FOR_EACH(__m) \
NODETY_CONTROL_OP_FOR_EACH(__m)
// clang-format on

Expand Down
67 changes: 66 additions & 1 deletion paddle/cinn/ir/utils/ir_nodes_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include "paddle/cinn/ir/utils/ir_nodes_collector.h"
#include <glog/logging.h>

#include "paddle/cinn/ir/intrinsic_ops.h"
#include "paddle/cinn/ir/ir.h"
#include "paddle/cinn/ir/ir_mutator.h"
#include "paddle/cinn/ir/ir_printer.h"

Expand Down Expand Up @@ -71,8 +73,71 @@ struct IrNodesCollector : public IRVisitorRequireReImpl<void> {
} \
}

NODETY_FORALL(__m)
NODETY_FORALL_EXCEPT_INTRINSIC(__m)
#undef __m

void Visit(const ir::IntrinsicOp* op) {
switch (op->getKind()) {
#define __(x) \
case ir::IntrinsicKind::k##x: \
Visit(llvm::dyn_cast<ir::intrinsics::x>(op)); \
break;

INTRINSIC_KIND_FOR_EACH(__)
#undef __
}
}

void Visit(const ir::intrinsics::GetAddr* x) {
if (x->data.defined()) {
Visit(&(x->data));
}
}

void Visit(const ir::intrinsics::BufferGetDataHandle* x) {
if (x->buffer.defined()) {
Visit(&(x->buffer));
}
}

void Visit(const ir::intrinsics::BufferGetDataConstHandle* x) {
if (x->buffer.defined()) {
Visit(&(x->buffer));
}
}

void Visit(const ir::intrinsics::PodValueToX* x) {
if (x->pod_value_ptr.defined()) {
Visit(&(x->pod_value_ptr));
}
}

void Visit(const ir::intrinsics::BufferCreate* x) {
if (x->buffer.defined()) {
Visit(&(x->buffer));
}
}

void Visit(const ir::intrinsics::ArgsConstruct* x) {
if (x->var.defined()) {
Expr convert = Expr(x->var);
Visit(&convert);
}
for (int i = 0; i < x->args.size(); ++i) {
if (x->args[i].defined()) {
Visit(&(x->args[i]));
}
}
}

void Visit(const ir::intrinsics::BuiltinIntrin* x) {
for (int i = 0; i < x->args.size(); ++i) {
if (x->args[i].defined()) {
Visit(&(x->args[i]));
}
}
}

std::set<void*> visited_;
};

Expand Down
14 changes: 11 additions & 3 deletions paddle/fluid/distributed/common/chunk_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#pragma once
#include <glog/logging.h>
#include "paddle/fluid/platform/enforce.h"

namespace paddle {
namespace distributed {
Expand Down Expand Up @@ -77,9 +78,16 @@ class ChunkAllocator {

void create_new_chunk() {
Chunk* chunk;
posix_memalign(reinterpret_cast<void**>(&chunk),
std::max<size_t>(sizeof(void*), alignof(Chunk)),
sizeof(Chunk) + sizeof(Node) * _chunk_size);
size_t alloc_size = sizeof(Chunk) + sizeof(Node) * _chunk_size;
int error = posix_memalign(reinterpret_cast<void**>(&chunk),
std::max<size_t>(sizeof(void*), alignof(Chunk)),
alloc_size);
PADDLE_ENFORCE_EQ(error,
0,
paddle::platform::errors::ResourceExhausted(
"Fail to alloc memory of %ld size, error code is %d.",
alloc_size,
error));
chunk->next = _chunks;
_chunks = chunk;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@ paddle::Tensor multiply_ad_func(const paddle::Tensor& x,
// Type promotion Logic
if (phi::NeedTypePromotion(x.dtype(), y.dtype())) {
VLOG(5) << "got different data type, run type protmotion automatically.";
LOG(WARNING) << "got different data type, run type protmotion "
"automatically, this may cause data type been changed.";
LOG_FIRST_N(WARNING, 1)
<< "got different data type, run type protmotion "
"automatically, this may cause data type been changed.";
auto op_name = phi::TransToFluidOpName("multiply");
auto promotion_type = phi::GetPromoteDtype(op_name, x.dtype(), y.dtype());

Expand Down Expand Up @@ -407,8 +408,9 @@ paddle::Tensor multiply_ad_func(const paddle::Tensor& x,
// Type promotion Logic
if (phi::NeedTypePromotion(x.dtype(), y.dtype())) {
VLOG(5) << "got different data type, run type protmotion automatically.";
LOG(WARNING) << "got different data type, run type protmotion "
"automatically, this may cause data type been changed.";
LOG_FIRST_N(WARNING, 1)
<< "got different data type, run type protmotion "
"automatically, this may cause data type been changed.";
auto op_name = phi::TransToFluidOpName("multiply");
auto promotion_type = phi::GetPromoteDtype(op_name, x.dtype(), y.dtype());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ class {} : public egr::GradNodeBase {{

TYPE_PROMOTION_LOGIC_TEMPLATE = """ if (phi::NeedTypePromotion({x}.dtype(), {y}.dtype())) {{
VLOG(5) << "got different data type, run type protmotion automatically.";
LOG(WARNING) << "got different data type, run type protmotion automatically, this may cause data type been changed.";
LOG_FIRST_N(WARNING, 1) << "got different data type, run type protmotion automatically, this may cause data type been changed.";
{op_name}
auto promotion_type = phi::GetPromoteDtype(op_name, {x}.dtype(), {y}.dtype());

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
get_property(ir_targets GLOBAL PROPERTY IR_TARGETS)
get_property(not_infer_modules GLOBAL PROPERTY NOT_INFER_MODULES)
set(utils_modules pretty_log string_helper benchmark utf8proc)
set(utils_modules pretty_log string_helper utf8proc)

if(NOT WITH_GFLAGS)
set(utils_modules ${utils_modules} paddle_flags)
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@ void AnalysisConfig::EnableXpu(int l3_size,
bool transformer_encoder_adaptive_seqlen,
bool enable_multi_stream) {
#if defined(PADDLE_WITH_XPU) || defined(LITE_SUBGRAPH_WITH_XPU)
LOG_FIRST_N(WARNING, 1)
<< "Parameters in EnableXpu/enable_xpu is deprecated since version "
"2.6.1, and will be removed in version 3.0! Please use "
"EnableXpu/enable_xpu without parameters, and use "
"SetXpuConfig/set_xpu_config to set options.";
use_xpu_ = true;
xpu_config_.l3_size = l3_size;
xpu_config_.conv_autotune_level = conv_autotune;
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ struct PD_INFER_DECL XpuConfig {
bool conv_autotune_file_writeback{false};

// Fc autotune level. The Optional values are 0-9. Default 0 means no
// autotune.
int fc_autotune_level{0};
// Base fc autotune info is read from fc_autotune_file.
std::string fc_autotune_file;
Expand Down Expand Up @@ -367,7 +368,7 @@ struct PD_INFER_DECL AnalysisConfig {
///
void EnableXpu(int l3_size = 0xfffc00,
bool l3_locked = false,
bool conv_autotune = true,
bool conv_autotune = false,
const std::string& conv_autotune_file = "",
const std::string& transformer_encoder_precision = "int16",
bool transformer_encoder_adaptive_seqlen = false,
Expand Down
8 changes: 7 additions & 1 deletion paddle/fluid/inference/tensorrt/op_teller.cc
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ struct SimpleOpTypeSetTeller : public Teller {
#endif
#if IS_TRT_VERSION_GE(7000)
teller_set.insert("tile");
int8_teller_set.insert("tile");
teller_set.insert("flatten_contiguous_range");
int8_teller_set.insert("flatten_contiguous_range");
teller_set.insert("rnn");
Expand Down Expand Up @@ -2302,15 +2303,20 @@ struct SimpleOpTypeSetTeller : public Teller {
if (!with_dynamic_shape) {
if (tile_inputs.find("repeat_times_tensor") != tile_inputs.end()) {
if (!desc.Input("repeat_times_tensor").empty()) {
VLOG(3) << "Tile op: repeat_times_tensor is not empty.";
return false;
}
}
if (tile_inputs.find("RepeatTimes") != tile_inputs.end()) {
if (!desc.Input("RepeatTimes").empty()) {
VLOG(3) << "Tile op: RepeatTimes is not empty.";
return false;
}
}
if (!desc.HasAttr("repeat_times")) return false;
if (!desc.HasAttr("repeat_times")) {
VLOG(3) << "Tile op:`repeat_times` is not set.";
return false;
}
}
}
#endif
Expand Down
13 changes: 0 additions & 13 deletions paddle/fluid/inference/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
cc_library(
benchmark
SRCS benchmark.cc
DEPS enforce common)
paddle_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark)
cc_library(
infer_io_utils
SRCS io_utils.cc
Expand All @@ -13,13 +8,5 @@ cc_library(
DEPS proto_desc enforce common)

cc_library(table_printer SRCS table_printer.cc)
paddle_test(test_table_printer SRCS table_printer_tester.cc)

proto_library(shape_range_info_proto SRCS shape_range_info.proto)

if(WITH_ONNXRUNTIME AND WIN32)
# Copy onnxruntime for some c++ test in Windows, since the test will
# be build only in CI, so suppose the generator in Windows is Ninja.
copy_onnx(test_benchmark)
copy_onnx(test_table_printer)
endif()
Loading