Skip to content

Commit e37b0d8

Browse files
authored
[CI] Add CPU and NPU workflow (#72249)
1 parent 66c4ecb commit e37b0d8

File tree

5 files changed

+391
-3
lines changed

5 files changed

+391
-3
lines changed

.github/workflows/CI.yml

+9-3
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ jobs:
3434
uses: ./.github/workflows/_Coverage.yml
3535
needs: clone
3636

37-
# Py3:
38-
# uses: ./.github/workflows/_Py3.yml
39-
# needs: Clone
37+
cpu:
38+
name: Linux-CPU
39+
uses: ./.github/workflows/_CPU.yml
40+
needs: clone
41+
42+
npu:
43+
name: Linux-NPU
44+
uses: ./.github/workflows/_NPU.yml
45+
needs: cpu

.github/workflows/_CPU.yml

+163
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
name: PR-CI-CPU
2+
3+
on:
4+
workflow_call:
5+
6+
env:
7+
dockerfile: Dockerfile.cuda9_cudnn7_gcc48_py35_centos6
8+
docker_image: 3d2fa88da0da
9+
PR_ID: ${{ github.event.pull_request.number }}
10+
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
11+
work_dir: /paddle
12+
PADDLE_ROOT: /paddle
13+
TASK: paddle-CI-${{ github.event.pull_request.number }}-cpu
14+
ci_scripts: ${{ github.workspace }}/ci
15+
BRANCH: ${{ github.event.pull_request.base.ref }}
16+
CI_name: cpu
17+
18+
defaults:
19+
run:
20+
shell: bash
21+
22+
jobs:
23+
build-and-test:
24+
name: Build and test
25+
runs-on:
26+
group: GZ_BD-CPU
27+
28+
steps:
29+
- name: Check docker image and run container
30+
env:
31+
WITH_SHARED_PHI: "ON"
32+
WITH_MKL: "OFF"
33+
WITH_TESTING: "ON"
34+
COVERALLS_UPLOAD: "OFF"
35+
GIT_PR_ID: ${{ github.event.pull_request.number }}
36+
PADDLE_VERSION: 0.0.0
37+
WITH_DISTRIBUTE: "ON"
38+
PREC_SUFFIX: .py3
39+
WITH_UNITY_BUILD: "ON"
40+
PY_VERSION: 3.9
41+
PROC_RUN: 12
42+
FLAGS_enable_eager_mode: 1
43+
WITH_TENSORRT: "OFF"
44+
GENERATOR: "Ninja"
45+
CCACHE_MAXSIZE: 150G
46+
CCACHE_LIMIT_MULTIPLE: 0.8
47+
WITH_AVX: "OFF"
48+
CCACHE_DIR: "/root/.ccache/cpu"
49+
run: |
50+
container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
51+
echo "container_name=${container_name}" >> ${{ github.env }}
52+
docker run -d -t --name ${container_name} \
53+
-v "/home/data/cfs:/home/data/cfs" \
54+
-v "/home/data/cfs/.cache/python35-cpu:/root/.cache" \
55+
-v "/home/data/cfs/.ccache:/root/.ccache" \
56+
-v "/dev/shm:/dev/shm" \
57+
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
58+
-v ${{ github.workspace }}:/paddle \
59+
-e BRANCH \
60+
-e PR_ID \
61+
-e COMMIT_ID \
62+
-e work_dir \
63+
-e PADDLE_ROOT \
64+
-e WITH_SHARED_PHI \
65+
-e WITH_MKL \
66+
-e WITH_TESTING \
67+
-e COVERALLS_UPLOAD \
68+
-e GIT_PR_ID \
69+
-e PADDLE_VERSION \
70+
-e WITH_DISTRIBUTE \
71+
-e PREC_SUFFIX \
72+
-e WITH_UNITY_BUILD \
73+
-e PY_VERSION \
74+
-e PROC_RUN \
75+
-e FLAGS_enable_eager_mode \
76+
-e WITH_TENSORRT \
77+
-e GENERATOR \
78+
-e CCACHE_MAXSIZE \
79+
-e CCACHE_LIMIT_MULTIPLE \
80+
-e GITHUB_ENV \
81+
-e ci_scripts \
82+
-e WITH_AVX \
83+
-e CCACHE_DIR \
84+
-w /paddle --network host ${docker_image}
85+
86+
- name: Download paddle.tar.gz and merge target branch
87+
env:
88+
work_dir: ${{ github.workspace }}
89+
run: |
90+
docker exec -t ${{ env.container_name }} /bin/bash -c '
91+
rm -rf * .[^.]*
92+
set -e
93+
echo "Downloading Paddle.tar.gz"
94+
wget -q --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate
95+
echo "Extracting Paddle.tar.gz"
96+
tar xf Paddle.tar.gz --strip-components=1
97+
rm Paddle.tar.gz
98+
git config --global user.name "PaddleCI"
99+
git config --global user.email "paddle_ci@example.com"
100+
git remote add upstream https://github.com/PaddlePaddle/Paddle.git
101+
source ${{ github.workspace }}/../../../proxy
102+
git checkout $BRANCH
103+
git pull upstream $BRANCH
104+
git fetch upstream $BRANCH
105+
git checkout test
106+
git merge --no-edit $BRANCH
107+
'
108+
109+
- name: Check bypass
110+
id: check-bypass
111+
uses: ./.github/actions/check-bypass
112+
with:
113+
github-token: ${{ secrets.GITHUB_TOKEN }}
114+
workflow-name: cpu
115+
116+
- name: Build
117+
run: |
118+
docker exec -t ${{ env.container_name }} /bin/bash -c '
119+
source ${{ github.workspace }}/../../../proxy
120+
bash ${ci_scripts}/run_setup.sh bdist_wheel
121+
EXCODE=$?
122+
exit $EXCODE
123+
'
124+
125+
- name: Test
126+
run: |
127+
docker exec -t ${{ env.container_name }} /bin/bash -c '
128+
source ${{ github.workspace }}/../../../proxy
129+
bash ${ci_scripts}/run_linux_cpu_test.sh
130+
EXCODE=$?
131+
source ${ci_scripts}/utils.sh; clean_build_files
132+
echo "::group::Install bce-python-sdk"
133+
python -m pip install bce-python-sdk==0.8.74
134+
echo "::endgroup::"
135+
exit $EXCODE
136+
'
137+
138+
- name: Upload paddle_whl
139+
env:
140+
home_path: ${{ github.workspace }}/..
141+
bos_file: ${{ github.workspace }}/../bos/BosClient.py
142+
paddle_whl: paddlepaddle-0.0.0-cp39-cp39-linux_x86_64.whl
143+
run: |
144+
docker exec -t ${{ env.container_name }} /bin/bash -c '
145+
export AK=paddle
146+
export SK=paddle
147+
if [ ! -f "${{ env.bos_file }}" ]; then
148+
wget -q --no-proxy -O ${{ env.home_path }}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate
149+
mkdir ${{ env.home_path }}/bos
150+
tar xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos
151+
fi
152+
cd dist
153+
echo "Uploading paddle_whl to bos"
154+
python3.9 ${{ env.bos_file }} ${{ env.paddle_whl }} paddle-github-action/PR/cpu_whl/${{ env.PR_ID }}/${{ env.COMMIT_ID }}
155+
'
156+
157+
- name: Terminate and delete the container
158+
if: always()
159+
run: |
160+
set +e
161+
docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*'
162+
docker stop ${{ env.container_name }}
163+
docker rm ${{ env.container_name }}

.github/workflows/_NPU.yml

+182
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
name: PR-CI-NPU-910B-Paddle
2+
3+
on:
4+
workflow_call:
5+
6+
env:
7+
dockerfile: dockerfile
8+
docker_image: 885123926a90
9+
PR_ID: ${{ github.event.pull_request.number }}
10+
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
11+
ci_scripts: /paddle/ci
12+
ci_scripts_runner: ${{ github.workspace }}/ci
13+
work_dir: /paddle
14+
PADDLE_ROOT: /paddle
15+
TASK: paddle-CI-${{ github.event.pull_request.number }}-npu
16+
BRANCH: ${{ github.event.pull_request.base.ref }}
17+
CI_name: npu
18+
19+
jobs:
20+
check-bypass:
21+
name: Check bypass for NPU
22+
uses: ./.github/workflows/check-bypass.yml
23+
with:
24+
workflow-name: 'npu'
25+
secrets:
26+
github-token: ${{ secrets.GITHUB_TOKEN }}
27+
28+
test:
29+
name: Test
30+
needs: check-bypass
31+
if: ${{ github.repository_owner == 'PaddlePaddle' && needs.check-bypass.outputs.can-skip != 'true' }}
32+
runs-on:
33+
group: NPU
34+
35+
steps:
36+
- name: Download paddle.tar.gz and update test branch
37+
run: |
38+
sudo rm -rf * .[^.]*
39+
source ~/.bashrc
40+
set -e
41+
echo "Downloading Paddle.tar.gz"
42+
wget -q --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate
43+
echo "Extracting Paddle.tar.gz"
44+
tar xf Paddle.tar.gz
45+
rm Paddle.tar.gz
46+
cd Paddle
47+
git remote add upstream https://github.com/PaddlePaddle/Paddle.git
48+
git config pull.rebase false
49+
git checkout test
50+
echo "Pull upstream develop"
51+
source ${{ github.workspace }}/../../../proxy
52+
git pull upstream $BRANCH --no-edit
53+
54+
- name: Determine the runner
55+
run: |
56+
runner_name=`(echo $PWD|awk -F '/' '{print $3}')`
57+
echo $runner_name
58+
source ${{ github.workspace }}/Paddle/ci/utils.sh
59+
determine_npu_runner ${runner_name}
60+
echo no_proxy="localhost,127.0.0.1,localaddress,.bj.bcebos.com,.localdomain.com,.cdn.bcebos.com,.baidu.com,.bcebos.com" >> ${{ github.env }}
61+
62+
- name: Clone PaddleCustomDevice repository
63+
run: |
64+
source ~/.bashrc
65+
git clone --depth=1000 -b ${BRANCH} https://github.com/PaddlePaddle/PaddleCustomDevice.git
66+
cd PaddleCustomDevice
67+
cp -r ../Paddle .
68+
69+
- name: Check docker image and run container
70+
env:
71+
PADDLE_VERSION: 0.0.0
72+
WITH_COVERAGE: "OFF"
73+
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
74+
AGILE_PULL_ID: ${{ github.event.pull_request.number }}
75+
AGILE_REVISION: ${{ github.event.pull_request.head.sha }}
76+
GIT_PR_ID: ${{ github.event.pull_request.number }}
77+
PYTHON_VERSION: 3.9
78+
no_proxy: bcebos.com
79+
USE_910B: 1
80+
FLAGS_set_to_1d: "False"
81+
NVIDIA_TF32_OVERRIDE: 0
82+
paddle_submodule: "ON"
83+
MC2: 1
84+
HCCL_OP_BASE_FFTS_MODE_ENABLE: "TRUE"
85+
PADDLE_XCCL_BACKEND: npu
86+
HCCL_SOCKET_IFNAME: =xgbe0
87+
FLAGS_eager_communication_connection: 1
88+
FLAGS_use_stride_kernel: 0
89+
FLAGS_allocator_strategy: naive_best_fit
90+
FLAGS_npu_storage_format: 0
91+
TEST_IMPORTANT: "ON"
92+
PADDLE_BRANCH: ${{ github.event.pull_request.base.ref }}
93+
home_dir: ${{ github.workspace }}/../../../..
94+
run: |
95+
echo ${ASCEND_RT_VISIBLE_DEVICES}
96+
container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
97+
echo "container_name=${container_name}" >> ${{ github.env }}
98+
docker run --privileged --pids-limit 409600 --shm-size=128G -d -t \
99+
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined --name ${container_name} \
100+
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
101+
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
102+
-v /usr/local/dcmi:/usr/local/dcmi \
103+
-v /ssd2/workspace/npu-dev/.cache:/root/.cache \
104+
-v /ssd2/workspace/npu-dev/.ccache:/root/.ccache \
105+
-v $home_dir/actions-runner:$home_dir/actions-runner \
106+
-v ${{ github.workspace }}/PaddleCustomDevice:/paddle \
107+
-e BRANCH \
108+
-e PR_ID \
109+
-e COMMIT_ID \
110+
-e work_dir \
111+
-e PADDLE_ROOT \
112+
-e PADDLE_VERSION \
113+
-e ASCEND_RT_VISIBLE_DEVICES \
114+
-e WITH_COVERAGE \
115+
-e GITHUB_API_TOKEN \
116+
-e AGILE_PULL_ID \
117+
-e AGILE_REVISION \
118+
-e GIT_PR_ID \
119+
-e PYTHON_VERSION \
120+
-e no_proxy \
121+
-e USE_910B \
122+
-e FLAGS_set_to_1d \
123+
-e NVIDIA_TF32_OVERRIDE \
124+
-e paddle_submodule \
125+
-e MC2 \
126+
-e HCCL_OP_BASE_FFTS_MODE_ENABLE \
127+
-e PADDLE_XCCL_BACKEND \
128+
-e HCCL_SOCKET_IFNAME \
129+
-e FLAGS_eager_communication_connection \
130+
-e FLAGS_use_stride_kernel \
131+
-e FLAGS_allocator_strategy \
132+
-e FLAGS_npu_storage_format \
133+
-e TEST_IMPORTANT \
134+
-e PADDLE_BRANCH \
135+
-w /paddle --network host ${docker_image} /bin/bash
136+
137+
- name: Install Paddle-CPU
138+
run: |
139+
docker exec -t ${{ env.container_name }} /bin/bash -c '
140+
set -x
141+
source ~/.bashrc
142+
wget -q https://sys-p0.bj.bcebos.com/libstdc%2B%2B6_13.1.0-8ubuntu1_20.04.2_amd64.deb
143+
dpkg -i *deb
144+
wget -q --no-proxy https://paddle-github-action.bj.bcebos.com/PR/cpu_whl/${PR_ID}/${COMMIT_ID}/paddlepaddle-0.0.0-cp39-cp39-linux_x86_64.whl --no-check-certificate
145+
PATH=/usr/local/bin:${PATH}
146+
echo "export PATH=$PATH" >> ~/.bashrc
147+
ln -sf $(which python3.9) /usr/local/bin/python
148+
ln -sf $(which pip3.9) /usr/local/bin/pip
149+
ln -sf $(which python3.9) /usr/bin/python
150+
ln -sf $(which pip3.9) /usr/bin/pip
151+
echo "::group::Install Paddle"
152+
pip install paddlepaddle-*.whl && rm -rf paddlepaddle-*.whl
153+
python -c "import paddle; print(paddle.__version__)"
154+
python -c "import paddle; print(paddle.version.commit)"
155+
echo "::endgroup::"
156+
'
157+
158+
- name: Build and test
159+
run: |
160+
docker exec -t ${{ env.container_name }} /bin/bash -c '
161+
source ~/.bashrc
162+
set -x
163+
echo "::group::Install dependencies"
164+
python -m pip install PyGithub
165+
python -m pip install wheel
166+
echo "::endgroup::"
167+
source /usr/local/Ascend/ascend-toolkit/set_env.sh
168+
pip install -U numpy==1.26.4
169+
git config --global --add safe.directory ${work_dir}
170+
git config --global --add safe.directory ${work_dir}/Paddle
171+
cd Paddle
172+
git submodule foreach "git config --global --add safe.directory \$toplevel/\$sm_path"
173+
cd ..
174+
bash backends/npu/tools/pr_ci_npu.sh;EXCODE=$?
175+
'
176+
177+
- name: Terminate and delete the container
178+
if: always()
179+
run: |
180+
docker exec -t ${container_name} /bin/bash -c 'rm -rf * .[^.]*'
181+
docker stop ${container_name}
182+
docker rm ${container_name}

.github/workflows/re-run.yml

+21
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,27 @@ jobs:
6868
REPO: ${{ github.event.repository.name }}
6969
JOB_NAME: 'PR-CI-SOT / Check bypass for SOT / Check bypass'
7070

71+
72+
- name: Rerun CPU
73+
if: ${{ contains(env.comment_body, 'cpu') }}
74+
uses: ./.github/actions/rerun-workflow
75+
with:
76+
PR_ID: ${{ github.event.issue.number }}
77+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
78+
OWNER: ${{ github.repository_owner }}
79+
REPO: ${{ github.event.repository.name }}
80+
JOB_NAME: 'Linux-CPU / Build and test'
81+
82+
- name: Rerun NPU
83+
if: ${{ contains(env.comment_body, 'npu')}}
84+
uses: ./.github/actions/rerun-workflow
85+
with:
86+
PR_ID: ${{ github.event.issue.number }}
87+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
88+
OWNER: ${{ github.repository_owner }}
89+
REPO: ${{ github.event.repository.name }}
90+
JOB_NAME: 'Linux-NPU / Test'
91+
7192
- name: Rerun inference
7293
if: ${{ contains(env.comment_body, 'inference') && !contains(env.comment_body, 'build') && !contains(env.comment_body, 'test') }}
7394
uses: ./.github/actions/rerun-workflow

0 commit comments

Comments
 (0)