|
| 1 | +name: PR-CI-NPU-910B-Paddle |
| 2 | + |
| 3 | +on: |
| 4 | + workflow_call: |
| 5 | + |
| 6 | +env: |
| 7 | + dockerfile: dockerfile |
| 8 | + docker_image: 885123926a90 |
| 9 | + PR_ID: ${{ github.event.pull_request.number }} |
| 10 | + COMMIT_ID: ${{ github.event.pull_request.head.sha }} |
| 11 | + ci_scripts: /paddle/ci |
| 12 | + ci_scripts_runner: ${{ github.workspace }}/ci |
| 13 | + work_dir: /paddle |
| 14 | + PADDLE_ROOT: /paddle |
| 15 | + TASK: paddle-CI-${{ github.event.pull_request.number }}-npu |
| 16 | + BRANCH: ${{ github.event.pull_request.base.ref }} |
| 17 | + CI_name: npu |
| 18 | + |
| 19 | +jobs: |
| 20 | + check-bypass: |
| 21 | + name: Check bypass for NPU |
| 22 | + uses: ./.github/workflows/check-bypass.yml |
| 23 | + with: |
| 24 | + workflow-name: 'npu' |
| 25 | + secrets: |
| 26 | + github-token: ${{ secrets.GITHUB_TOKEN }} |
| 27 | + |
| 28 | + test: |
| 29 | + name: Test |
| 30 | + needs: check-bypass |
| 31 | + if: ${{ github.repository_owner == 'PaddlePaddle' && needs.check-bypass.outputs.can-skip != 'true' }} |
| 32 | + runs-on: |
| 33 | + group: NPU |
| 34 | + |
| 35 | + steps: |
| 36 | + - name: Download paddle.tar.gz and update test branch |
| 37 | + run: | |
| 38 | + sudo rm -rf * .[^.]* |
| 39 | + source ~/.bashrc |
| 40 | + set -e |
| 41 | + echo "Downloading Paddle.tar.gz" |
| 42 | + wget -q --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate |
| 43 | + echo "Extracting Paddle.tar.gz" |
| 44 | + tar xf Paddle.tar.gz |
| 45 | + rm Paddle.tar.gz |
| 46 | + cd Paddle |
| 47 | + git remote add upstream https://github.com/PaddlePaddle/Paddle.git |
| 48 | + git config pull.rebase false |
| 49 | + git checkout test |
| 50 | + echo "Pull upstream develop" |
| 51 | + source ${{ github.workspace }}/../../../proxy |
| 52 | + git pull upstream $BRANCH --no-edit |
| 53 | +
|
| 54 | + - name: Determine the runner |
| 55 | + run: | |
| 56 | + runner_name=`(echo $PWD|awk -F '/' '{print $3}')` |
| 57 | + echo $runner_name |
| 58 | + source ${{ github.workspace }}/Paddle/ci/utils.sh |
| 59 | + determine_npu_runner ${runner_name} |
| 60 | + echo no_proxy="localhost,127.0.0.1,localaddress,.bj.bcebos.com,.localdomain.com,.cdn.bcebos.com,.baidu.com,.bcebos.com" >> ${{ github.env }} |
| 61 | +
|
| 62 | + - name: Clone PaddleCustomDevice repository |
| 63 | + run: | |
| 64 | + source ~/.bashrc |
| 65 | + git clone --depth=1000 -b ${BRANCH} https://github.com/PaddlePaddle/PaddleCustomDevice.git |
| 66 | + cd PaddleCustomDevice |
| 67 | + cp -r ../Paddle . |
| 68 | +
|
| 69 | + - name: Check docker image and run container |
| 70 | + env: |
| 71 | + PADDLE_VERSION: 0.0.0 |
| 72 | + WITH_COVERAGE: "OFF" |
| 73 | + GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
| 74 | + AGILE_PULL_ID: ${{ github.event.pull_request.number }} |
| 75 | + AGILE_REVISION: ${{ github.event.pull_request.head.sha }} |
| 76 | + GIT_PR_ID: ${{ github.event.pull_request.number }} |
| 77 | + PYTHON_VERSION: 3.9 |
| 78 | + no_proxy: bcebos.com |
| 79 | + USE_910B: 1 |
| 80 | + FLAGS_set_to_1d: "False" |
| 81 | + NVIDIA_TF32_OVERRIDE: 0 |
| 82 | + paddle_submodule: "ON" |
| 83 | + MC2: 1 |
| 84 | + HCCL_OP_BASE_FFTS_MODE_ENABLE: "TRUE" |
| 85 | + PADDLE_XCCL_BACKEND: npu |
| 86 | + HCCL_SOCKET_IFNAME: =xgbe0 |
| 87 | + FLAGS_eager_communication_connection: 1 |
| 88 | + FLAGS_use_stride_kernel: 0 |
| 89 | + FLAGS_allocator_strategy: naive_best_fit |
| 90 | + FLAGS_npu_storage_format: 0 |
| 91 | + TEST_IMPORTANT: "ON" |
| 92 | + PADDLE_BRANCH: ${{ github.event.pull_request.base.ref }} |
| 93 | + home_dir: ${{ github.workspace }}/../../../.. |
| 94 | + run: | |
| 95 | + echo ${ASCEND_RT_VISIBLE_DEVICES} |
| 96 | + container_name=${TASK}-$(date +%Y%m%d-%H%M%S) |
| 97 | + echo "container_name=${container_name}" >> ${{ github.env }} |
| 98 | + docker run --privileged --pids-limit 409600 --shm-size=128G -d -t \ |
| 99 | + --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --name ${container_name} \ |
| 100 | + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ |
| 101 | + -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ |
| 102 | + -v /usr/local/dcmi:/usr/local/dcmi \ |
| 103 | + -v /ssd2/workspace/npu-dev/.cache:/root/.cache \ |
| 104 | + -v /ssd2/workspace/npu-dev/.ccache:/root/.ccache \ |
| 105 | + -v $home_dir/actions-runner:$home_dir/actions-runner \ |
| 106 | + -v ${{ github.workspace }}/PaddleCustomDevice:/paddle \ |
| 107 | + -e BRANCH \ |
| 108 | + -e PR_ID \ |
| 109 | + -e COMMIT_ID \ |
| 110 | + -e work_dir \ |
| 111 | + -e PADDLE_ROOT \ |
| 112 | + -e PADDLE_VERSION \ |
| 113 | + -e ASCEND_RT_VISIBLE_DEVICES \ |
| 114 | + -e WITH_COVERAGE \ |
| 115 | + -e GITHUB_API_TOKEN \ |
| 116 | + -e AGILE_PULL_ID \ |
| 117 | + -e AGILE_REVISION \ |
| 118 | + -e GIT_PR_ID \ |
| 119 | + -e PYTHON_VERSION \ |
| 120 | + -e no_proxy \ |
| 121 | + -e USE_910B \ |
| 122 | + -e FLAGS_set_to_1d \ |
| 123 | + -e NVIDIA_TF32_OVERRIDE \ |
| 124 | + -e paddle_submodule \ |
| 125 | + -e MC2 \ |
| 126 | + -e HCCL_OP_BASE_FFTS_MODE_ENABLE \ |
| 127 | + -e PADDLE_XCCL_BACKEND \ |
| 128 | + -e HCCL_SOCKET_IFNAME \ |
| 129 | + -e FLAGS_eager_communication_connection \ |
| 130 | + -e FLAGS_use_stride_kernel \ |
| 131 | + -e FLAGS_allocator_strategy \ |
| 132 | + -e FLAGS_npu_storage_format \ |
| 133 | + -e TEST_IMPORTANT \ |
| 134 | + -e PADDLE_BRANCH \ |
| 135 | + -w /paddle --network host ${docker_image} /bin/bash |
| 136 | +
|
| 137 | + - name: Install Paddle-CPU |
| 138 | + run: | |
| 139 | + docker exec -t ${{ env.container_name }} /bin/bash -c ' |
| 140 | + set -x |
| 141 | + source ~/.bashrc |
| 142 | + wget -q https://sys-p0.bj.bcebos.com/libstdc%2B%2B6_13.1.0-8ubuntu1_20.04.2_amd64.deb |
| 143 | + dpkg -i *deb |
| 144 | + wget -q --no-proxy https://paddle-github-action.bj.bcebos.com/PR/cpu_whl/${PR_ID}/${COMMIT_ID}/paddlepaddle-0.0.0-cp39-cp39-linux_x86_64.whl --no-check-certificate |
| 145 | + PATH=/usr/local/bin:${PATH} |
| 146 | + echo "export PATH=$PATH" >> ~/.bashrc |
| 147 | + ln -sf $(which python3.9) /usr/local/bin/python |
| 148 | + ln -sf $(which pip3.9) /usr/local/bin/pip |
| 149 | + ln -sf $(which python3.9) /usr/bin/python |
| 150 | + ln -sf $(which pip3.9) /usr/bin/pip |
| 151 | + echo "::group::Install Paddle" |
| 152 | + pip install paddlepaddle-*.whl && rm -rf paddlepaddle-*.whl |
| 153 | + python -c "import paddle; print(paddle.__version__)" |
| 154 | + python -c "import paddle; print(paddle.version.commit)" |
| 155 | + echo "::endgroup::" |
| 156 | + ' |
| 157 | +
|
| 158 | + - name: Build and test |
| 159 | + run: | |
| 160 | + docker exec -t ${{ env.container_name }} /bin/bash -c ' |
| 161 | + source ~/.bashrc |
| 162 | + set -x |
| 163 | + echo "::group::Install dependencies" |
| 164 | + python -m pip install PyGithub |
| 165 | + python -m pip install wheel |
| 166 | + echo "::endgroup::" |
| 167 | + source /usr/local/Ascend/ascend-toolkit/set_env.sh |
| 168 | + pip install -U numpy==1.26.4 |
| 169 | + git config --global --add safe.directory ${work_dir} |
| 170 | + git config --global --add safe.directory ${work_dir}/Paddle |
| 171 | + cd Paddle |
| 172 | + git submodule foreach "git config --global --add safe.directory \$toplevel/\$sm_path" |
| 173 | + cd .. |
| 174 | + bash backends/npu/tools/pr_ci_npu.sh;EXCODE=$? |
| 175 | + ' |
| 176 | +
|
| 177 | + - name: Terminate and delete the container |
| 178 | + if: always() |
| 179 | + run: | |
| 180 | + docker exec -t ${container_name} /bin/bash -c 'rm -rf * .[^.]*' |
| 181 | + docker stop ${container_name} |
| 182 | + docker rm ${container_name} |
0 commit comments