Skip to content

Commit 8a7bca5

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fake_dp_mode
2 parents 00ae919 + 8e5d148 commit 8a7bca5

File tree

104 files changed

+1787
-432
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

104 files changed

+1787
-432
lines changed

.github/workflows/Build-develop.yml

Lines changed: 0 additions & 178 deletions
This file was deleted.

.github/workflows/_Linux-XPU.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,13 @@ jobs:
5353
CUDA_VERSION:
5454
CUDNN_VERSION:
5555
WITH_XPU_BKCL: "ON"
56+
WITH_XPU_FFT: "ON"
5657
WITH_XPU_XRE5: "ON"
5758
CACHE_DIR: /root/.cache
5859
CCACHE_DIR: /root/.ccache
5960
CCACHE_MAXSIZE: 150G
6061
CCACHE_LIMIT_MULTIPLE: 0.8
62+
IF_KUNLUN3: "ON"
6163
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
6264
home_dir: ${{ github.workspace }}/../../../..
6365
run: |
@@ -85,6 +87,7 @@ jobs:
8587
-e CUDA_VERSION \
8688
-e CUDNN_VERSION \
8789
-e WITH_XPU_BKCL \
90+
-e WITH_XPU_FFT \
8891
-e WITH_XPU_XRE5 \
8992
-e WITH_INFERENCE_API_TEST \
9093
-e CACHE_DIR \
@@ -93,6 +96,7 @@ jobs:
9396
-e CCACHE_LIMIT_MULTIPLE \
9497
-e ci_scripts \
9598
-e WITH_AVX \
99+
-e IF_KUNLUN3 \
96100
-e no_proxy \
97101
-e GITHUB_API_TOKEN \
98102
-e CI_name \
@@ -212,6 +216,7 @@ jobs:
212216
CCACHE_DIR: /root/.ccache
213217
CCACHE_MAXSIZE: 150G
214218
CCACHE_LIMIT_MULTIPLE: 0.8
219+
IF_KUNLUN3: "ON"
215220
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
216221
home_dir: ${{ github.workspace }}/../../../..
217222
FLAGS_use_stride_kernel: "0"
@@ -252,6 +257,7 @@ jobs:
252257
-e CCACHE_LIMIT_MULTIPLE \
253258
-e ci_scripts \
254259
-e WITH_AVX \
260+
-e IF_KUNLUN3 \
255261
-e no_proxy \
256262
-e GITHUB_API_TOKEN \
257263
-e FLAGS_use_stride_kernel \

ci/kunlun_test.sh

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,15 +177,12 @@ set +x
177177
echo "Training Resnet50 completed!"
178178

179179
#inference Reset50
180-
IFS=',' read -ra DEVICES <<< "$CUDA_VISIBLE_DEVICES"
181-
echo ${DEVICES[0]}
182-
183180
echo "Starting to predict ResNet50 model..."
184181
python main.py -c paddlex/configs/modules/image_classification/ResNet50.yaml \
185182
-o Global.mode=predict \
186183
-o Predict.model_dir="./resnet50_output/best_model/inference" \
187184
-o Predict.input="https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg" \
188-
-o Global.device="xpu:${DEVICES[0]}"
185+
-o Global.device="xpu:0"
189186
echo "Predicting Resnet50 completed!"
190187
cd ..
191188
export FLAGS_enable_pir_api=1

ci/run_setup.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,13 +216,13 @@ function run_setup(){
216216
echo "if you use setup.py to compile,please export envs as following in /paddle ..."
217217
cat << EOF
218218
========================================
219-
export CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} WITH_GPU=${WITH_GPU:-OFF} WITH_SHARED_PHI=${WITH_SHARED_PHI:-OFF} WITH_TENSORRT=${WITH_TENSORRT:-ON} WITH_OPENVINO=${WITH_OPENVINO:-OFF} WITH_ROCM=${WITH_ROCM:-OFF} WITH_CINN=${WITH_CINN:-OFF} WITH_DISTRIBUTE=${distributed_flag} WITH_MKL=${WITH_MKL:-ON} WITH_AVX=${WITH_AVX:-OFF} CUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} NEW_RELEASE_PYPI=${NEW_RELEASE_PYPI:-OFF} NEW_RELEASE_ALL=${NEW_RELEASE_ALL:-OFF} NEW_RELEASE_JIT=${NEW_RELEASE_JIT:-OFF} WITH_PYTHON=${WITH_PYTHON:-ON} CUDNN_ROOT=/usr/ WITH_TESTING=${WITH_TESTING:-ON} WITH_COVERAGE=${WITH_COVERAGE:-OFF} WITH_INCREMENTAL_COVERAGE=${WITH_INCREMENTAL_COVERAGE:-OFF} CMAKE_MODULE_PATH=/opt/rocm/hip/cmake CMAKE_EXPORT_COMPILE_COMMANDS=ON WITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} INFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} PY_VERSION=${PY_VERSION:-3.8} CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} WITH_PSCORE=${pscore_flag} WITH_PSLIB=${pslib_flag} WITH_GLOO=${gloo_flag} WITH_XPU=${WITH_XPU:-OFF} WITH_IPU=${WITH_IPU:-OFF} XPU_SDK_ROOT=${XPU_SDK_ROOT:-""} WITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF} WITH_XPU_XRE5=${WITH_XPU_XRE5:-OFF} WITH_ARM=${WITH_ARM:-OFF} WITH_STRIP=${WITH_STRIP:-ON} ON_INFER=${ON_INFER:-OFF} WITH_HETERPS=${WITH_HETERPS:-OFF} CUDA_ARCH_BIN=${CUDA_ARCH_BIN} WITH_RECORD_BUILDTIME=${WITH_RECORD_BUILDTIME:-OFF} WITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF} WITH_ONNXRUNTIME=${WITH_ONNXRUNTIME:-OFF} WITH_CUDNN_FRONTEND=${WITH_CUDNN_FRONTEND:-OFF} WITH_CPP_TEST=${WITH_CPP_TEST:-OFF} FA_BUILD_WITH_CACHE=${FA_BUILD_WITH_CACHE:-ON}
219+
export CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} WITH_GPU=${WITH_GPU:-OFF} WITH_SHARED_PHI=${WITH_SHARED_PHI:-OFF} WITH_TENSORRT=${WITH_TENSORRT:-ON} WITH_OPENVINO=${WITH_OPENVINO:-OFF} WITH_ROCM=${WITH_ROCM:-OFF} WITH_CINN=${WITH_CINN:-OFF} WITH_DISTRIBUTE=${distributed_flag} WITH_MKL=${WITH_MKL:-ON} WITH_AVX=${WITH_AVX:-OFF} CUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} NEW_RELEASE_PYPI=${NEW_RELEASE_PYPI:-OFF} NEW_RELEASE_ALL=${NEW_RELEASE_ALL:-OFF} NEW_RELEASE_JIT=${NEW_RELEASE_JIT:-OFF} WITH_PYTHON=${WITH_PYTHON:-ON} CUDNN_ROOT=/usr/ WITH_TESTING=${WITH_TESTING:-ON} WITH_COVERAGE=${WITH_COVERAGE:-OFF} WITH_INCREMENTAL_COVERAGE=${WITH_INCREMENTAL_COVERAGE:-OFF} CMAKE_MODULE_PATH=/opt/rocm/hip/cmake CMAKE_EXPORT_COMPILE_COMMANDS=ON WITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} INFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} PY_VERSION=${PY_VERSION:-3.8} CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} WITH_PSCORE=${pscore_flag} WITH_PSLIB=${pslib_flag} WITH_GLOO=${gloo_flag} WITH_XPU=${WITH_XPU:-OFF} WITH_IPU=${WITH_IPU:-OFF} XPU_SDK_ROOT=${XPU_SDK_ROOT:-""} WITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF} WITH_XPU_XHPC=${WITH_XPU_XHPC:-OFF} WITH_XPU_XFT=${WITH_XPU_XFT:-OFF} WITH_XPU_XRE5=${WITH_XPU_XRE5:-OFF} WITH_XPU_FFT=${WITH_XPU_FFT:-OFF} WITH_ARM=${WITH_ARM:-OFF} WITH_STRIP=${WITH_STRIP:-ON} ON_INFER=${ON_INFER:-OFF} WITH_HETERPS=${WITH_HETERPS:-OFF} CUDA_ARCH_BIN=${CUDA_ARCH_BIN} WITH_RECORD_BUILDTIME=${WITH_RECORD_BUILDTIME:-OFF} WITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF} WITH_ONNXRUNTIME=${WITH_ONNXRUNTIME:-OFF} WITH_CUDNN_FRONTEND=${WITH_CUDNN_FRONTEND:-OFF} WITH_CPP_TEST=${WITH_CPP_TEST:-OFF} FA_BUILD_WITH_CACHE=${FA_BUILD_WITH_CACHE:-ON}
220220
========================================
221221
EOF
222222
echo "if you use cmake to compile,please Configuring cmake in /paddle/build ..."
223223
cat <<EOF
224224
========================================
225-
cmake .. -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} -DWITH_GPU=${WITH_GPU:-OFF} -DWITH_SHARED_PHI=${WITH_SHARED_PHI:-OFF} -DWITH_TENSORRT=${WITH_TENSORRT:-ON} -DWITH_OPENVINO=${WITH_OPENVINO:-OFF} -DWITH_ROCM=${WITH_ROCM:-OFF} -DWITH_CINN=${WITH_CINN:-OFF} -DWITH_DISTRIBUTE=${distributed_flag} -DWITH_MKL=${WITH_MKL:-ON} -DWITH_AVX=${WITH_AVX:-OFF} -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} -DNEW_RELEASE_PYPI=${NEW_RELEASE_PYPI:-OFF} -DNEW_RELEASE_ALL=${NEW_RELEASE_ALL:-OFF} -DNEW_RELEASE_JIT=${NEW_RELEASE_JIT:-OFF} -DWITH_PYTHON=${WITH_PYTHON:-ON} -DCUDNN_ROOT=/usr/ -DWITH_TESTING=${WITH_TESTING:-ON} -DWITH_COVERAGE=${WITH_COVERAGE:-OFF} -DWITH_INCREMENTAL_COVERAGE=${WITH_INCREMENTAL_COVERAGE:-OFF} -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} -DPY_VERSION=${PY_VERSION:-3.8} -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} -DWITH_PSCORE=${pscore_flag} -DWITH_PSLIB=${pslib_flag} -DWITH_GLOO=${gloo_flag} -DWITH_XPU=${WITH_XPU:-OFF} -DWITH_IPU=${WITH_IPU:-OFF} -DXPU_SDK_ROOT=${XPU_SDK_ROOT:-""} -DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF} -DWITH_XPU_XRE5=${WITH_XPU_XRE5:-OFF} -DWITH_ARM=${WITH_ARM:-OFF} -DWITH_STRIP=${WITH_STRIP:-ON} -DON_INFER=${ON_INFER:-OFF} -DWITH_HETERPS=${WITH_HETERPS:-OFF} -DCUDA_ARCH_BIN=${CUDA_ARCH_BIN} -DWITH_RECORD_BUILDTIME=${WITH_RECORD_BUILDTIME:-OFF} -DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF} -DWITH_ONNXRUNTIME=${WITH_ONNXRUNTIME:-OFF} -DWITH_CUDNN_FRONTEND=${WITH_CUDNN_FRONTEND:-OFF} -DWITH_CPP_TEST=${WITH_CPP_TEST:-OFF} -DFA_BUILD_WITH_CACHE=${FA_BUILD_WITH_CACHE:-ON}
225+
cmake .. -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} -DWITH_GPU=${WITH_GPU:-OFF} -DWITH_SHARED_PHI=${WITH_SHARED_PHI:-OFF} -DWITH_TENSORRT=${WITH_TENSORRT:-ON} -DWITH_OPENVINO=${WITH_OPENVINO:-OFF} -DWITH_ROCM=${WITH_ROCM:-OFF} -DWITH_CINN=${WITH_CINN:-OFF} -DWITH_DISTRIBUTE=${distributed_flag} -DWITH_MKL=${WITH_MKL:-ON} -DWITH_AVX=${WITH_AVX:-OFF} -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} -DNEW_RELEASE_PYPI=${NEW_RELEASE_PYPI:-OFF} -DNEW_RELEASE_ALL=${NEW_RELEASE_ALL:-OFF} -DNEW_RELEASE_JIT=${NEW_RELEASE_JIT:-OFF} -DWITH_PYTHON=${WITH_PYTHON:-ON} -DCUDNN_ROOT=/usr/ -DWITH_TESTING=${WITH_TESTING:-ON} -DWITH_COVERAGE=${WITH_COVERAGE:-OFF} -DWITH_INCREMENTAL_COVERAGE=${WITH_INCREMENTAL_COVERAGE:-OFF} -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} -DPY_VERSION=${PY_VERSION:-3.8} -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} -DWITH_PSCORE=${pscore_flag} -DWITH_PSLIB=${pslib_flag} -DWITH_GLOO=${gloo_flag} -DWITH_XPU=${WITH_XPU:-OFF} -DWITH_IPU=${WITH_IPU:-OFF} -DXPU_SDK_ROOT=${XPU_SDK_ROOT:-""} -DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF} -DWITH_XPU_XHPC=${WITH_XPU_XHPC:-OFF} -DWITH_XPU_XFT=${WITH_XPU_XFT:-OFF} -DWITH_XPU_XRE5=${WITH_XPU_XRE5:-OFF} -DWITH_XPU_FFT=${WITH_XPU_FFT:-OFF} -DWITH_ARM=${WITH_ARM:-OFF} -DWITH_STRIP=${WITH_STRIP:-ON} -DON_INFER=${ON_INFER:-OFF} -DWITH_HETERPS=${WITH_HETERPS:-OFF} -DCUDA_ARCH_BIN=${CUDA_ARCH_BIN} -DWITH_RECORD_BUILDTIME=${WITH_RECORD_BUILDTIME:-OFF} -DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF} -DWITH_ONNXRUNTIME=${WITH_ONNXRUNTIME:-OFF} -DWITH_CUDNN_FRONTEND=${WITH_CUDNN_FRONTEND:-OFF} -DWITH_CPP_TEST=${WITH_CPP_TEST:-OFF} -DFA_BUILD_WITH_CACHE=${FA_BUILD_WITH_CACHE:-ON}
226226
========================================
227227
EOF
228228
export CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
@@ -256,7 +256,10 @@ EOF
256256
export WITH_IPU=${WITH_IPU:-OFF}
257257
export XPU_SDK_ROOT=${XPU_SDK_ROOT:-""}
258258
export WITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF}
259+
export WITH_XPU_XHPC=${WITH_XPU_XHPC:-OFF}
260+
export WITH_XPU_XFT=${WITH_XPU_XFT:-OFF}
259261
export WITH_XPU_XRE5=${WITH_XPU_XRE5:-OFF}
262+
export WITH_XPU_FFT=${WITH_XPU_FFT:-OFF}
260263
export WITH_ARM=${WITH_ARM:-OFF}
261264
export WITH_STRIP=${WITH_STRIP:-ON}
262265
export ON_INFER=${ON_INFER:-OFF}

ci/utils.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,10 @@ function cmake_base() {
479479
-DWITH_IPU=${WITH_IPU:-OFF}
480480
-DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF}
481481
-DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF}
482+
-DWITH_XPU_XHPC=${WITH_XPU_XHPC:-OFF}
483+
-DWITH_XPU_XFT=${WITH_XPU_XFT:-OFF}
482484
-DWITH_XPU_XRE5=${WITH_XPU_XRE5:-OFF}
485+
-DWITH_XPU_FFT=${WITH_XPU_FFT:-OFF}
483486
-DWITH_ARM=${WITH_ARM:-OFF}
484487
-DWITH_STRIP=${WITH_STRIP:-ON}
485488
-DON_INFER=${ON_INFER:-OFF}
@@ -531,6 +534,9 @@ EOF
531534
-DWITH_IPU=${WITH_IPU:-OFF} \
532535
-DXPU_SDK_ROOT=${XPU_SDK_ROOT:-""} \
533536
-DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF} \
537+
-DWITH_XPU_XHPC=${WITH_XPU_XHPC:-OFF} \
538+
-DWITH_XPU_XFT=${WITH_XPU_XFT:-OFF} \
539+
-DWITH_XPU_FFT=${WITH_XPU_FFT:-OFF} \
534540
-DWITH_ARM=${WITH_ARM:-OFF} \
535541
-DWITH_STRIP=${WITH_STRIP:-ON} \
536542
-DON_INFER=${ON_INFER:-OFF} \

paddle/fluid/eager/backward.cc

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,14 @@ std::vector<paddle::Tensor> RunBackward(
225225
std::unordered_map<GradNodeBase*, int> node_in_degree_map =
226226
getInDegreeMap(queue);
227227

228+
std::deque<GradNodeBase*> ready_queue;
229+
for (GradNodeBase* item : queue) {
230+
if (!node_in_degree_map.count(item)) {
231+
ready_queue.push_back(item);
232+
}
233+
}
234+
queue = ready_queue;
235+
228236
std::list<GradNodeBase*> force_sequential_nodes_forward_queue =
229237
egr::Controller::Instance().GetForceSequentialNodes();
230238
std::deque<GradNodeBase*> force_sequential_nodes_queue;
@@ -256,10 +264,6 @@ std::vector<paddle::Tensor> RunBackward(
256264
GradNodeBase* node = queue.front();
257265
VLOG(3) << "Preparing GradNode:" << node->name() << " addr:" << node;
258266
try {
259-
if (queue.size() > 1 && node_in_degree_map[node] != 0) {
260-
queue.pop_front();
261-
continue;
262-
}
263267
queue.pop_front();
264268

265269
// Run node: This is where Hook happens

paddle/fluid/pir/dialect/op_generator/op_build_gen.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@
141141
'Pool2DInferMeta',
142142
'ReduceIntArrayAxisInferMetaBase',
143143
'ReduceIntArrayAxisInferMeta',
144+
'StrictReduceIntArrayAxisInferMetaBase',
145+
'StrictReduceIntArrayAxisInferMeta',
144146
'ReshapeInferMeta',
145147
'ReshapeWithXShapeInferMeta',
146148
'ReverseInferMeta',

paddle/fluid/pybind/eager_utils.cc

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,10 +1278,6 @@ paddle::optional<paddle::Tensor> GetOptionalTensorFromArgs(
12781278
const phi::distributed::ProcessMesh* mesh) {
12791279
PyObject* obj = PyTuple_GET_ITEM(args, arg_idx);
12801280

1281-
if (PyTuple_Check(obj)) {
1282-
obj = PyTuple_GET_ITEM(obj, 0);
1283-
}
1284-
12851281
if (obj == nullptr || obj == Py_None) {
12861282
if (!dispensable) {
12871283
PADDLE_THROW(common::errors::InvalidArgument(
@@ -1611,10 +1607,6 @@ paddle::Tensor* GetTensorPtrFromArgs(const std::string& op_type,
16111607
bool dispensable) {
16121608
PyObject* obj = PyTuple_GET_ITEM(args, arg_idx);
16131609

1614-
if (PyTuple_Check(obj)) {
1615-
obj = PyTuple_GET_ITEM(obj, 0);
1616-
}
1617-
16181610
if (obj == nullptr || obj == Py_None) {
16191611
if (!dispensable) {
16201612
PADDLE_THROW(common::errors::InvalidArgument(

0 commit comments

Comments
 (0)