Skip to content

Commit d8b13ca

Browse files
authored
[CI] Add kunlun and change name with XPU (#72068)
1 parent e37b0d8 commit d8b13ca

File tree

6 files changed

+584
-7
lines changed

6 files changed

+584
-7
lines changed

.github/workflows/CI.yml

+5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ jobs:
2424
uses: ./.github/workflows/_SOT.yml
2525
needs: clone
2626

27+
xpu:
28+
name: Linux-XPU
29+
uses: ./.github/workflows/_Linux-XPU.yml
30+
needs: clone
31+
2732
inference:
2833
name: PR-CI-Inference
2934
uses: ./.github/workflows/_Inference.yml

.github/workflows/_Linux-XPU.yml

+266
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
name: Linux-XPU
2+
3+
on:
4+
workflow_call:
5+
6+
env:
7+
dockerfile: dockerfile
8+
docker_image: aa13dc110ab3
9+
PR_ID: ${{ github.event.pull_request.number }}
10+
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
11+
ci_scripts: /paddle/ci
12+
ci_scripts_runner: ${{ github.workspace }}/ci
13+
work_dir: /paddle
14+
PADDLE_ROOT: /paddle
15+
BRANCH: ${{ github.event.pull_request.base.ref }}
16+
CI_name: xpu
17+
18+
defaults:
19+
run:
20+
shell: bash
21+
22+
jobs:
23+
check-bypass:
24+
name: Check bypass for XPU
25+
uses: ./.github/workflows/check-bypass.yml
26+
with:
27+
workflow-name: 'xpu'
28+
secrets:
29+
github-token: ${{ secrets.GITHUB_TOKEN }}
30+
31+
build:
32+
name: Build
33+
needs: check-bypass
34+
if: ${{ github.repository_owner == 'PaddlePaddle' && needs.check-bypass.outputs.can-skip != 'true' }}
35+
env:
36+
TASK: paddle-CI-${{ github.event.pull_request.number }}-xpu_build
37+
runs-on:
38+
group: Kunlun-CPU
39+
40+
steps:
41+
- name: Download paddle.tar.gz and update test branch
42+
run: |
43+
set -e
44+
echo "Downloading Paddle.tar.gz"
45+
wget -q --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate
46+
echo "Extracting Paddle.tar.gz"
47+
tar xf Paddle.tar.gz --strip-components=1
48+
rm Paddle.tar.gz
49+
git config --global user.name "PaddleCI"
50+
git config --global user.email "paddle_ci@example.com"
51+
git remote add upstream https://github.com/PaddlePaddle/Paddle.git
52+
source ${{ github.workspace }}/../../../proxy
53+
git config pull.rebase false
54+
git checkout test
55+
echo "Pull upstream develop or target branch"
56+
git pull upstream $BRANCH --no-edit
57+
58+
- name: Check docker image and run container
59+
env:
60+
WITH_SHARED_PHI: "ON"
61+
WITH_XPU: "ON"
62+
COVERALLS_UPLOAD: "OFF"
63+
WITH_AVX: "OFF"
64+
GIT_PR_ID: ${{ github.event.pull_request.number }}
65+
PADDLE_VERSION: 0.0.0
66+
WITH_TESTING: "ON"
67+
WITH_DISTRIBUTE: "ON"
68+
PY_VERSION: "3.10"
69+
XPU_VISIBLE_DEVICES: "0,1"
70+
CUDA_VERSION:
71+
CUDNN_VERSION:
72+
WITH_XPU_BKCL: "ON"
73+
WITH_XPU_XRE5: "ON"
74+
CACHE_DIR: /root/.cache
75+
CCACHE_DIR: /root/.ccache
76+
CCACHE_MAXSIZE: 150G
77+
CCACHE_LIMIT_MULTIPLE: 0.8
78+
no_proxy: "bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
79+
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
80+
home_dir: ${{ github.workspace }}/../../../..
81+
run: |
82+
container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
83+
echo "container_name=${container_name}" >> ${{ github.env }}
84+
docker run --privileged --ulimit nofile=102400:102400 -d -t --name ${container_name} \
85+
-v $home_dir/.cache:/root/.cache \
86+
-v $home_dir/.ccache:/root/.ccache \
87+
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
88+
-v ${{ github.workspace }}:/paddle \
89+
-e BRANCH \
90+
-e PR_ID \
91+
-e COMMIT_ID \
92+
-e work_dir \
93+
-e PADDLE_ROOT \
94+
-e WITH_SHARED_PHI \
95+
-e WITH_XPU \
96+
-e COVERALLS_UPLOAD \
97+
-e GIT_PR_ID \
98+
-e PADDLE_VERSION \
99+
-e WITH_TESTING \
100+
-e WITH_DISTRIBUTE \
101+
-e PY_VERSION \
102+
-e XPU_VISIBLE_DEVICES \
103+
-e CUDA_VERSION \
104+
-e CUDNN_VERSION \
105+
-e WITH_XPU_BKCL \
106+
-e WITH_XPU_XRE5 \
107+
-e WITH_INFERENCE_API_TEST \
108+
-e CACHE_DIR \
109+
-e CCACHE_DIR \
110+
-e CCACHE_MAXSIZE \
111+
-e CCACHE_LIMIT_MULTIPLE \
112+
-e ci_scripts \
113+
-e WITH_AVX \
114+
-e no_proxy \
115+
-e GITHUB_API_TOKEN \
116+
-w /paddle --network host ${docker_image} /bin/bash
117+
118+
- name: Run build
119+
env:
120+
work_dir: ${{ github.workspace }}
121+
PADDLE_ROOT: ${{ github.workspace }}
122+
run: |
123+
docker exec -t ${{ env.container_name }} /bin/bash -c '
124+
source ${{ github.workspace }}/../../../proxy
125+
ulimit -n 102400
126+
git config --global --add safe.directory ${work_dir}
127+
git submodule foreach "git config --global --add safe.directory \$toplevel/\$sm_path"
128+
bash -x ${ci_scripts}/run_setup.sh bdist_wheel
129+
EXCODE=$?
130+
exit $EXCODE
131+
'
132+
133+
- name: Upload build.tar.gz and paddle_whl to bos
134+
env:
135+
AK: paddle
136+
SK: paddle
137+
home_path: ${{ github.workspace }}/..
138+
bos_file: ${{ github.workspace }}/../bos/BosClient.py
139+
paddle_whl: paddlepaddle_xpu-0.0.0-cp310-cp310-linux_x86_64.whl
140+
run: |
141+
if [ ! -f "${{ env.bos_file }}" ]; then
142+
wget -q --no-proxy -O ${{ env.home_path }}/bos_new.tar.gz https://xly-devops.bj.bcebos.com/home/bos_new.tar.gz --no-check-certificate
143+
mkdir ${{ env.home_path }}/bos
144+
tar xf ${{ env.home_path }}/bos_new.tar.gz -C ${{ env.home_path }}/bos
145+
fi
146+
cd ..
147+
tar --use-compress-program="pigz" -cpf build.tar.gz Paddle
148+
# source /home/opt/deck/1.0/etc/bashrc
149+
python3 ${bos_file} build.tar.gz paddle-github-action/PR/xpu/${{ env.PR_ID }}/${{ env.COMMIT_ID }}
150+
rm build.tar.gz
151+
cp ${{ github.workspace }}/dist/$paddle_whl .
152+
python3 ${bos_file} ${paddle_whl} paddle-github-action/PR/xpu/${{ env.PR_ID }}/${{ env.COMMIT_ID }}
153+
rm ${paddle_whl}
154+
155+
- name: Terminate and delete the container
156+
if: always()
157+
run: |
158+
docker exec -t ${container_name} /bin/bash -c 'rm -rf * .[^.]*'
159+
docker stop ${container_name}
160+
docker rm ${container_name}
161+
162+
test:
163+
name: Test
164+
needs: build
165+
env:
166+
TASK: paddle-CI-${{ github.event.pull_request.number }}-xpu_test
167+
runs-on:
168+
group: Kunlun
169+
170+
steps:
171+
172+
- name: Download build.tar.gz
173+
run: |
174+
sudo rm -rf * .[^.]*
175+
wget -q --no-proxy https://paddle-github-action.bj.bcebos.com/PR/xpu/${PR_ID}/${COMMIT_ID}/build.tar.gz --no-check-certificate
176+
# wget -q --no-proxy https://paddle-github-action.bj.bcebos.com/PR/xpu/72068/a58543d2f92bbe5c817e185e9cc24b549c919601/build.tar.gz --no-check-certificate
177+
tar --use-compress-program="pigz" -xpf build.tar.gz --strip-components=1
178+
rm build.tar.gz
179+
180+
- name: Determine the runner
181+
run: |
182+
runner_name=`(echo $PWD|awk -F '/' '{print $3}')`
183+
echo $runner_name
184+
source ${ci_scripts_runner}/utils.sh
185+
determine_kunlun_runner ${runner_name}
186+
187+
- name: Check docker image and run container
188+
env:
189+
WITH_XPU: "ON"
190+
COVERALLS_UPLOAD: "OFF"
191+
CMAKE_BUILD_TYPE: Release
192+
WITH_AVX: "OFF"
193+
GIT_PR_ID: ${{ github.event.pull_request.number }}
194+
PADDLE_VERSION: 0.0.0
195+
WITH_TESTING: "ON"
196+
WITH_DISTRIBUTE: "ON"
197+
PY_VERSION: "3.10"
198+
XPU_VISIBLE_DEVICES: ${{ env.CUDA_VISIBLE_DEVICES }}
199+
CUDA_VISIBLE_DEVICES: ${{ env.CUDA_VISIBLE_DEVICES }}
200+
CUDA_VERSION:
201+
CUDNN_VERSION:
202+
WITH_XPU_BKCL: "ON"
203+
CACHE_DIR: /root/.cache
204+
CCACHE_DIR: /root/.ccache
205+
CCACHE_MAXSIZE: 150G
206+
CCACHE_LIMIT_MULTIPLE: 0.8
207+
no_proxy: "bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
208+
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
209+
home_dir: ${{ github.workspace }}/../../../..
210+
FLAGS_use_stride_kernel: "0"
211+
run: |
212+
container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
213+
echo "container_name=${container_name}" >> ${{ github.env }}
214+
sudo mkdir -p /run/user/0
215+
docker run --cap-add=SYS_PTRACE --privileged --ulimit nofile=102400 --ulimit core=-1 --shm-size=32g -d -t --name ${container_name} \
216+
-v /ssd1/cibuild/.cache:/root/.cache \
217+
-v /ssd1/cibuild/.ccache:/root/.ccache \
218+
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
219+
-v ${{ github.workspace }}:/paddle \
220+
--device ${XPU_CODE_1} \
221+
--device ${XPU_CODE_2} \
222+
--shm-size=32g \
223+
-e BRANCH \
224+
-e PR_ID \
225+
-e COMMIT_ID \
226+
-e work_dir \
227+
-e PADDLE_ROOT \
228+
-e WITH_XPU \
229+
-e COVERALLS_UPLOAD \
230+
-e CMAKE_BUILD_TYPE \
231+
-e GIT_PR_ID \
232+
-e PADDLE_VERSION \
233+
-e WITH_TESTING \
234+
-e WITH_DISTRIBUTE \
235+
-e PY_VERSION \
236+
-e XPU_VISIBLE_DEVICES \
237+
-e CUDA_VISIBLE_DEVICES \
238+
-e CUDA_VERSION \
239+
-e CUDNN_VERSION \
240+
-e WITH_XPU_BKCL \
241+
-e CACHE_DIR \
242+
-e CCACHE_DIR \
243+
-e WITH_INFERENCE_API_TEST \
244+
-e CCACHE_MAXSIZE \
245+
-e CCACHE_LIMIT_MULTIPLE \
246+
-e ci_scripts \
247+
-e WITH_AVX \
248+
-e no_proxy \
249+
-e GITHUB_API_TOKEN \
250+
-e FLAGS_use_stride_kernel \
251+
-w /paddle --network host ${docker_image} /bin/bash
252+
253+
- name: Run test
254+
run: |
255+
sudo mkdir -p /run/user/0
256+
docker exec -t ${{ env.container_name }} /bin/bash -c '
257+
bash ${ci_scripts}/kunlun_test.sh
258+
'
259+
260+
- name: Terminate and delete the container
261+
if: always()
262+
run: |
263+
sudo mkdir -p /run/user/0
264+
docker exec -t ${container_name} /bin/bash -c 'rm -rf * .[^.]*'
265+
docker stop ${container_name}
266+
docker rm ${container_name}

.github/workflows/re-run.yml

+30
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,33 @@ jobs:
118118
OWNER: ${{ github.repository_owner }}
119119
REPO: ${{ github.event.repository.name }}
120120
JOB_NAME: 'PR-CI-Inference / Test'
121+
122+
- name: Rerun XPU
123+
if: ${{ contains(env.comment_body, 'xpu') && !contains(env.comment_body, 'build') && !contains(env.comment_body, 'test') }}
124+
uses: ./.github/actions/rerun-workflow
125+
with:
126+
PR_ID: ${{ github.event.issue.number }}
127+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
128+
OWNER: ${{ github.repository_owner }}
129+
REPO: ${{ github.event.repository.name }}
130+
JOB_NAME: 'Linux-XPU / Check bypass for XPU / Check bypass'
131+
132+
- name: Rerun XPU build
133+
if: ${{ contains(env.comment_body, 'xpu') && contains(env.comment_body, 'build') }}
134+
uses: ./.github/actions/rerun-workflow
135+
with:
136+
PR_ID: ${{ github.event.issue.number }}
137+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
138+
OWNER: ${{ github.repository_owner }}
139+
REPO: ${{ github.event.repository.name }}
140+
JOB_NAME: 'Linux-XPU / Build'
141+
142+
- name: Rerun XPU test
143+
if: ${{ contains(env.comment_body, 'xpu') && contains(env.comment_body, 'test') }}
144+
uses: ./.github/actions/rerun-workflow
145+
with:
146+
PR_ID: ${{ github.event.issue.number }}
147+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
148+
OWNER: ${{ github.repository_owner }}
149+
REPO: ${{ github.event.repository.name }}
150+
JOB_NAME: 'Linux-XPU / Test'

0 commit comments

Comments
 (0)