Skip to content

Commit a6ae95a

Browse files
authored
Merge branch 'develop' into support_tp_conv
2 parents e8f716d + 91f3ea4 commit a6ae95a

File tree

261 files changed

+18261
-1417
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

261 files changed

+18261
-1417
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ repos:
6262
- repo: https://github.com/astral-sh/ruff-pre-commit
6363
rev: v0.11.11
6464
hooks:
65-
- id: ruff
65+
- id: ruff-check
6666
args: [--fix, --exit-non-zero-on-fix, --no-cache]
6767
# For C++ files
6868
- repo: local

SECURITY.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ These tools include adversarial example evaluation test, pseudo-natural environm
1919
Always load and execute untrusted models inside a sandbox and be sure to know the security impacts.
2020
There are several ways in which a model could become untrusted. PaddlePaddle has enough features to impact on the system. (e.g. `paddle.load` uses [pickle](https://docs.python.org/3/library/pickle.html) implicitly, which may cause malformed models to achieve arbitrary code execution). So we recommend when using the untrusted models, you need to carefully audit it and run PaddlePaddle inside a sandbox.
2121

22+
### Using distributed features
23+
PaddlePaddle offers distributed computing capabilities through the paddle.distributed package. These distributed features are meant for secure, trusted environments only, not for use on public or untrusted networks.
24+
25+
For efficiency, PaddlePaddle Distributed (e.g. RPC) does not use encryption or authentication. Messages are sent in plain text, and connections from any source are accepted. This means if you run a PaddlePaddle Distributed program on your network, anyone who can access that network could send tasks to PaddlePaddle, and those tasks will be executed without any security checks, using the same permissions as the PaddlePaddle process.
26+
2227
## PaddlePaddle Code Security
2328

2429
PaddlePaddle always take code security seriously. However, due to the complexity of the framework and its dependence on other thirdparty open source libraries, there may still be some security issues undetected. Therefore, we hope that more security researchers and PaddlePaddle developers can participate in the code security program. We encourage responsible disclosure of security issues, as well as contributing code to improve our vulnerability finding tools to make PaddlePaddle safer.

cmake/cudnn.cmake

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,24 @@ else()
1010
CACHE PATH "CUDNN ROOT")
1111
endif()
1212

13+
set(TARGET_ARCH "x86_64")
14+
if(NOT ${CMAKE_SYSTEM_PROCESSOR})
15+
set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
16+
endif()
17+
1318
find_path(
1419
CUDNN_INCLUDE_DIR cudnn.h
15-
PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include $ENV{CUDNN_ROOT}
16-
$ENV{CUDNN_ROOT}/include ${CUDA_TOOLKIT_INCLUDE}
20+
PATHS ${CUDNN_ROOT}
21+
${CUDNN_ROOT}/include
22+
${CUDNN_ROOT}/include/${TARGET_ARCH}-linux-gnu
23+
$ENV{CUDNN_ROOT}
24+
$ENV{CUDNN_ROOT}/include
25+
${CUDA_TOOLKIT_INCLUDE}
1726
/usr/local/lib/python${PY_VERSION}/dist-packages/nvidia/cudnn/include/
1827
NO_DEFAULT_PATH)
1928

2029
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
2130

22-
set(TARGET_ARCH "x86_64")
23-
if(NOT ${CMAKE_SYSTEM_PROCESSOR})
24-
set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
25-
endif()
26-
2731
list(
2832
APPEND
2933
CUDNN_CHECK_LIBRARY_DIRS

cmake/external/xpu.cmake

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ set(XPU_FFT_LIB_NAME "libcufft.so")
3333
add_compile_definitions(XPUAPI_NOT_INCLUDE_DEPRECATED)
3434

3535
if(NOT DEFINED XPU_XHPC_BASE_DATE)
36-
set(XPU_XHPC_BASE_DATE "dev/20250417")
36+
set(XPU_XHPC_BASE_DATE "dev/20250602")
3737
endif()
38-
set(XPU_XCCL_BASE_VERSION "3.0.2.5") # For XRE5
38+
set(XPU_XCCL_BASE_VERSION "3.0.2.7") # For XRE5
3939
if(NOT DEFINED XPU_XFT_BASE_VERSION)
4040
set(XPU_XFT_BASE_VERSION "20250507/xpu3")
4141
endif()
@@ -95,10 +95,18 @@ if(WITH_XPU_FFT)
9595
set(XPU_FFT_DIR_NAME "xpufft_ubuntu2004-x86_64")
9696
endif()
9797

98-
if(WITH_AARCH64)
99-
set(XPU_XRE_DIR_NAME "xre-kylin_aarch64")
100-
set(XPU_XCCL_DIR_NAME "") # TODO: xccl has no kylin output now.
101-
set(XPU_XFT_DIR_NAME "") # TODO: xft has no kylin output at now.
98+
if(WITH_ARM)
99+
if(WITH_XPU_XRE5)
100+
set(XPU_XRE_DIR_NAME "xre-kylin_v10_server-aarch64-${XPU_XRE_BASE_VERSION}")
101+
# TODO: xccl has no kylin output now. set default value here.
102+
set(XPU_XCCL_DIR_NAME "xccl_Linux_x86_64")
103+
set(XPU_XHPC_DIR_NAME "xhpc-kylinv4_aarch64")
104+
set(XPU_XFT_DIR_NAME "") # TODO: xft has no kylin output at now.
105+
else()
106+
set(XPU_XRE_DIR_NAME "")
107+
set(XPU_XCCL_DIR_NAME "") # TODO: xccl has no kylin output now.
108+
set(XPU_XFT_DIR_NAME "") # TODO: xft has no kylin output at now.
109+
endif()
102110
elseif(WITH_SUNWAY)
103111
set(XPU_XRE_DIR_NAME "xre-deepin_sw6_64")
104112
set(XPU_XCCL_DIR_NAME "") # TODO: xccl has no deepin output at now.
@@ -349,17 +357,14 @@ if(WITH_XPU_XRE5)
349357
${XPU_XBLAS_LIB}
350358
${XPU_API_LIB}
351359
${XPU_XFA_LIB}
352-
${XPU_XPUDNN_LIB})
360+
${XPU_XPUDNN_LIB}
361+
${XPU_ML_LIB})
353362
else()
354363
target_link_libraries(xpulib ${XPU_RT_LIB} ${XPU_API_LIB})
355364
endif()
356365

357366
if(WITH_XPU_BKCL)
358-
if(WITH_XPU_XRE5)
359-
target_link_libraries(xpulib ${XPU_ML_LIB} ${XPU_BKCL_LIB})
360-
else()
361-
target_link_libraries(xpulib ${XPU_BKCL_LIB})
362-
endif()
367+
target_link_libraries(xpulib ${XPU_BKCL_LIB})
363368
endif()
364369

365370
add_dependencies(xpulib ${XPU_PROJECT})

cmake/flags.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@ if(NOT WIN32)
152152
-fdata-sections
153153
-Wl
154154
-gc-sections
155-
-Werror
156155
-Wall
157156
-Wextra
158157
-Wno-unused-parameter

paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ OP_SAME_OPERANDS_AND_RESULT(YoloBoxHead)
210210
OP_SAME_OPERANDS_AND_RESULT(StandardGamma)
211211
OP_SAME_OPERANDS_AND_RESULT(MaskedFill)
212212
OP_SAME_OPERANDS_AND_RESULT(MaskedFill_)
213+
OP_SAME_OPERANDS_AND_RESULT(IndexElementwisePut)
214+
OP_SAME_OPERANDS_AND_RESULT(IndexElementwisePut_)
213215

214216
bool ScaleOpInferSymbolicShape(pir::Operation *op,
215217
pir::InferSymbolicShapeContext *infer_context) {

paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,8 @@ OP_DECLARE_INFER_SYMBOLIC_SHAPE(YoloBoxHead)
207207
OP_DECLARE_INFER_SYMBOLIC_SHAPE(StandardGamma)
208208
OP_DECLARE_INFER_SYMBOLIC_SHAPE(MaskedFill)
209209
OP_DECLARE_INFER_SYMBOLIC_SHAPE(MaskedFill_)
210+
OP_DECLARE_INFER_SYMBOLIC_SHAPE(IndexElementwisePut)
211+
OP_DECLARE_INFER_SYMBOLIC_SHAPE(IndexElementwisePut_)
210212

211213
} // namespace paddle::dialect
212214

paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2314,15 +2314,40 @@ bool NanmedianOpInferSymbolicShape(
23142314
if (mode == "avg") {
23152315
median_shape.emplace_back(2);
23162316
}
2317-
infer_context->SetShapeOrDataForValue(
2318-
op->result(0),
2319-
symbol::ShapeOrDataDimExprs{
2320-
symbol::TensorShapeOrDataDimExprs(out_shape)});
2321-
infer_context->SetShapeOrDataForValue(
2322-
op->result(1),
2323-
symbol::ShapeOrDataDimExprs{
2324-
symbol::TensorShapeOrDataDimExprs(median_shape)});
23252317

2318+
const auto &IsZero = [&](const symbol::DimExpr &dim_expr) {
2319+
if (dim_expr.isa<int64_t>()) {
2320+
return dim_expr.dyn_cast<int64_t>() == static_cast<int64_t>(0);
2321+
}
2322+
return false;
2323+
};
2324+
bool size_0 = false;
2325+
for (size_t i = 0; i < x_shape.size(); i++) {
2326+
if (IsZero(x_shape.at(i))) {
2327+
size_0 = true;
2328+
break;
2329+
}
2330+
}
2331+
if (size_0) {
2332+
std::vector<symbol::DimExpr> x_numel_0_shape = {};
2333+
infer_context->SetShapeOrDataForValue(
2334+
op->result(0),
2335+
symbol::ShapeOrDataDimExprs{
2336+
symbol::TensorShapeOrDataDimExprs(x_numel_0_shape)});
2337+
infer_context->SetShapeOrDataForValue(
2338+
op->result(1),
2339+
symbol::ShapeOrDataDimExprs{
2340+
symbol::TensorShapeOrDataDimExprs(x_numel_0_shape)});
2341+
} else {
2342+
infer_context->SetShapeOrDataForValue(
2343+
op->result(0),
2344+
symbol::ShapeOrDataDimExprs{
2345+
symbol::TensorShapeOrDataDimExprs(out_shape)});
2346+
infer_context->SetShapeOrDataForValue(
2347+
op->result(1),
2348+
symbol::ShapeOrDataDimExprs{
2349+
symbol::TensorShapeOrDataDimExprs(median_shape)});
2350+
}
23262351
return true;
23272352
}
23282353

paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,13 +1003,13 @@ bool WhileOp::InferSymbolicShape(
10031003
auto yield_input_data_opt = yield_input_shape_or_data.data();
10041004
auto input_data_opt =
10051005
infer_context->GetShapeOrDataForValue(body_args[i]).data();
1006-
bool const_data_not_euqal =
1006+
bool const_data_not_equal =
10071007
is_all_const_data(yield_input_data_opt) &&
10081008
(!is_all_const_data(input_data_opt) ||
10091009
is_all_const_data(input_data_opt) &&
10101010
yield_input_data_opt.value() != input_data_opt.value());
10111011
auto result_shape_or_data =
1012-
const_data_not_euqal
1012+
const_data_not_equal
10131013
? symbol::TensorShapeOrDataDimExprs(
10141014
yield_input_shape_or_data.shape(),
10151015
creat_new_data(yield_input_data_opt.value().size()))

paddle/fluid/pir/serialize_deserialize/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ endif()
1313

1414
file(GLOB_RECURSE YAML_PATCH_FILES "*.yaml")
1515
# change pir version when new patches are added
16-
add_definitions(-DDEVELOP_VERSION=0)
17-
add_definitions(-DRELEASE_VERSION=1)
16+
add_definitions(-DDEVELOP_VERSION=2)
17+
add_definitions(-DRELEASE_VERSION=2)
1818
set(TEMPLATE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/patch/template.h.in)
1919
set(PATCH_HEADER ${CMAKE_CURRENT_BINARY_DIR}/patch/patch.h)
2020

paddle/fluid/primitive/decomp_rule/decomp_vjp/details.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2307,15 +2307,18 @@ void group_norm_grad(const Tensor& x,
23072307
auto tmp1 = out_grad_data * (x_data - mean_new) * sqrt_var_1;
23082308

23092309
auto scale_grad_tmp = reshape<T>(
2310-
tmp1.sum(reduce_axis_except_channel, scale->dtype(), false), {-1});
2310+
tmp1.sum(reduce_axis_except_channel, x_data.dtype(), false), {-1});
2311+
scale_grad_tmp = ConvertToOrig<T>(scale_grad_tmp, scale->dtype());
2312+
23112313
set_output<T>(scale_grad_tmp, scale_grad);
23122314
}
23132315
}
23142316

23152317
if (bias_grad) {
23162318
if (bias) {
23172319
auto bias_grad_tmp =
2318-
out_grad_data.sum(reduce_axis_except_channel, bias->dtype(), false);
2320+
out_grad_data.sum(reduce_axis_except_channel, x_data.dtype(), false);
2321+
bias_grad_tmp = ConvertToOrig<T>(bias_grad_tmp, bias->dtype());
23192322

23202323
set_output<T>(reshape<T>(bias_grad_tmp, {-1}), bias_grad);
23212324
}

paddle/fluid/pybind/auto_parallel_py.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -930,6 +930,10 @@ static void parse_attr(PyObject *obj,
930930
auto attr = CastPyArg2DataType(
931931
obj, infer_spmd_string, static_cast<ssize_t>(arg_pos));
932932
ctx->EmplaceBackAttr(attr);
933+
} else if (PyUnicode_Check(obj)) {
934+
auto attr =
935+
CastPyArg2String(obj, infer_spmd_string, static_cast<ssize_t>(arg_pos));
936+
ctx->EmplaceBackAttr(attr);
933937
} else { // TODO(ljz) support other types
934938
PADDLE_THROW(common::errors::InvalidArgument(
935939
"%s(): argument (position %d) must be "

paddle/fluid/pybind/eager_method.cc

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ typedef SSIZE_T ssize_t;
2929
#include "paddle/fluid/eager/hooks.h"
3030
#include "paddle/fluid/eager/utils.h"
3131
#include "paddle/fluid/framework/convert_utils.h"
32+
#include "paddle/fluid/framework/tensor_util.h"
3233
#include "paddle/fluid/platform/enforce.h"
3334
#include "paddle/fluid/pybind/eager.h"
3435
#include "paddle/fluid/pybind/eager_utils.h"
@@ -1398,6 +1399,61 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self,
13981399
EAGER_CATCH_AND_THROW_RETURN_NULL
13991400
}
14001401

1402+
static PyObject* tensor_method_set_underline_tensor(TensorObject* self,
1403+
PyObject* args,
1404+
PyObject* kwargs) {
1405+
EAGER_TRY
1406+
auto& value = GetTensorFromArgs("set_tensor", "value", args, 0, false);
1407+
if (!value.defined()) {
1408+
PADDLE_THROW(
1409+
common::errors::Unavailable("The `set_tensor()` method of (Dist)Tensor "
1410+
"get a non initialized src value"));
1411+
} else if (value.is_dense_tensor()) {
1412+
auto* src_tensor = static_cast<phi::DenseTensor*>(value.impl().get());
1413+
if (self->tensor.is_dense_tensor()) {
1414+
auto* dst_tensor =
1415+
static_cast<phi::DenseTensor*>(self->tensor.impl().get());
1416+
framework::TensorCopy(*src_tensor, dst_tensor->place(), dst_tensor);
1417+
} else {
1418+
PADDLE_THROW(common::errors::Unavailable(
1419+
"The `set_tensor()` method of non DenseTensor get a DenseTensor src "
1420+
"value"));
1421+
}
1422+
1423+
} else if (value.is_dist_tensor()) {
1424+
#ifdef PADDLE_WITH_DISTRIBUTE
1425+
auto* src_tensor =
1426+
static_cast<phi::distributed::DistTensor*>(value.impl().get());
1427+
if (self->tensor.is_dist_tensor()) {
1428+
auto* dst_tensor =
1429+
static_cast<phi::distributed::DistTensor*>(self->tensor.impl().get());
1430+
framework::TensorCopy(*(src_tensor->unsafe_mutable_value()),
1431+
dst_tensor->place(),
1432+
dst_tensor->unsafe_mutable_value());
1433+
1434+
// TensorCopyFrom(dst_tensor->unsafe_mutable_value(),
1435+
// *(src_tensor->unsafe_mutable_value()), dst_tensor->place(), -1);
1436+
} else {
1437+
PADDLE_THROW(
1438+
common::errors::Unavailable("The `set_tensor()` method of non "
1439+
"DistTensor get a DistTensor src value"));
1440+
}
1441+
#else
1442+
PADDLE_THROW(common::errors::Unavailable(
1443+
"The `set_tensor()` method of (Dist)Tensor is not supported in the "
1444+
"current PaddlePaddle, please recompile and installPaddlePaddle "
1445+
"with the option of `WITH_DISTRIBUTE=ON`."));
1446+
#endif
1447+
1448+
} else {
1449+
PADDLE_THROW(common::errors::Unavailable(
1450+
"The `set_tensor()` method of (Dist)Tensor get a non "
1451+
"DenseTensor/DistTensor src value"));
1452+
}
1453+
RETURN_PY_NONE
1454+
EAGER_CATCH_AND_THROW_RETURN_NULL
1455+
}
1456+
14011457
static PyObject* tensor_method_get_underline_selected_rows(TensorObject* self,
14021458
PyObject* args,
14031459
PyObject* kwargs) {
@@ -1930,8 +1986,35 @@ static PyObject* tensor__setitem_dygraph(TensorObject* self,
19301986
transed_sub_tensor =
19311987
masked_fill__ad_func(transed_sub_tensor, mask_tensor, value_tensor);
19321988
} else {
1989+
#ifdef PADDLE_WITH_CUDA
1990+
// TODO(czy): remove in the future
1991+
if (transed_sub_tensor.is_gpu() && !out_is_view &&
1992+
transed_index.size() == 1 && value_tensor.numel() == 1) {
1993+
transed_index = expand_outplace(transed_index);
1994+
while (transed_index.size() <
1995+
static_cast<size_t>(transed_sub_tensor.dims().size())) {
1996+
transed_index.emplace_back(empty_ad_func(
1997+
{}, transed_index[0].dtype(), transed_index[0].place()));
1998+
}
1999+
2000+
AdvancedIndex ad = AdvancedIndex(transed_sub_tensor, transed_index);
2001+
transed_sub_tensor =
2002+
index_elementwise_put__ad_func(transed_sub_tensor,
2003+
ad.indices,
2004+
value_tensor,
2005+
ad.src_sizes,
2006+
ad.src_strides,
2007+
ad.indexed_sizes,
2008+
ad.indexed_strides);
2009+
2010+
} else {
2011+
transed_sub_tensor = index_put__ad_func(
2012+
transed_sub_tensor, transed_index, value_tensor);
2013+
}
2014+
#else
19332015
transed_sub_tensor =
19342016
index_put__ad_func(transed_sub_tensor, transed_index, value_tensor);
2017+
#endif
19352018
}
19362019
if (out_is_view) {
19372020
// NOTE(zoooo0820): if out_is_view is true, it is a case of
@@ -3643,6 +3726,10 @@ PyMethodDef variable_methods[] = { // NOLINT
36433726
(PyCFunction)(void (*)())tensor_method__get_tensor_from_selected_rows,
36443727
METH_VARARGS | METH_KEYWORDS,
36453728
nullptr},
3729+
{"set_tensor",
3730+
(PyCFunction)(void (*)())tensor_method_set_underline_tensor,
3731+
METH_VARARGS | METH_KEYWORDS,
3732+
nullptr},
36463733
{"_getitem_dygraph",
36473734
(PyCFunction)(void (*)())tensor__getitem_dygraph,
36483735
METH_VARARGS | METH_KEYWORDS,

paddle/fluid/pybind/pybind.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,6 +1265,19 @@ PYBIND11_MODULE(libpaddle, m) {
12651265
platform::BeginCUDAGraphCapture(
12661266
place, static_cast<paddle::gpuStreamCaptureMode>(mode));
12671267
})
1268+
.def_static(
1269+
"begin_capture_with_pool_id",
1270+
[](phi::GPUPlace place, int mode, std::optional<int64_t> pool_id) {
1271+
if (pool_id.has_value()) {
1272+
platform::BeginCUDAGraphCapture(
1273+
place,
1274+
static_cast<paddle::gpuStreamCaptureMode>(mode),
1275+
pool_id.value());
1276+
} else {
1277+
platform::BeginCUDAGraphCapture(
1278+
place, static_cast<paddle::gpuStreamCaptureMode>(mode));
1279+
}
1280+
})
12681281
.def_static("end_capture", &platform::EndCUDAGraphCapture)
12691282
.def_static("gen_new_memory_pool_id",
12701283
&phi::backends::gpu::CUDAGraph::UniqueMemoryPoolID)

0 commit comments

Comments
 (0)