Skip to content

Commit 5998ff3

Browse files
[SDAA]Add Tecorigin SDAA backend (PaddlePaddle#1305)
1 parent 2ed814b commit 5998ff3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+6295
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
- [飞桨自定义接入硬件后端(苹果MPS)](backends/mps/README.md)
1919
- [飞桨自定义接入硬件后端(壁仞GPU)](backends/biren_gpu/README_cn.md)
2020
- [飞桨自定义接入硬件后端(燧原GCU)](backends/gcu/README_cn.md)
21+
- [飞桨自定义接入硬件后端(太初SDAA)](backends/sdaa/README_cn.md)
2122

2223
## 版权和许可证
2324

README_en.md

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ PaddleCustomDevice has supported the following backends:
1818
- [PaddlePaddle Custom Device Implementaion for Apple MPS](backends/mps/README.md)
1919
- [PaddlePaddle Custom Device Implementaion for Biren GPU](backends/biren_gpu/README.md)
2020
- [PaddlePaddle Custom Device Implementaion for Enflame GCU](backends/gcu/README.md)
21+
- [PaddlePaddle Custom Device Implementaion for Tecorigin SDAA](backends/sdaa/README.md)
2122

2223
## Copyright and License
2324
PaddleCustomDevice is provided under the [Apache-2.0 license](LICENSE).

README_ja.md

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ PaddleCustomDevice は以下のバックエンドをサポートしています:
1818
- [Apple MPS 用 PaddlePaddle カスタムデバイス実装](backends/mps/README.md)
1919
- [Biren GPU 用 PaddlePaddle カスタムデバイス実装](backends/biren_gpu/README.md)
2020
- [Enflame GCU 用 PaddlePaddle カスタムデバイス実装](backends/gcu/README.md)
21+
- [Tecorigin SDAA 用 PaddlePaddle カスタムデバイス実装](backends/sdaa/README.md)
2122

2223
## 著作権とライセンス
2324

backends/sdaa/CMakeLists.txt

+161
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
cmake_minimum_required(VERSION 3.10)
2+
3+
project(paddle-custom-sdaa CXX C)
4+
add_definitions(-std=c++14)
5+
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
6+
7+
option(WITH_TESTING "compile with unit testing" ON)
8+
option(NATIVE_SDAA "use native sdaa lib" ON)
9+
option(WITH_MKLDNN "compile with MKLDNN support" ON)
10+
option(WITH_SW "compile with sw arch support" OFF)
11+
option(WITH_GIT_COMMIT "compile with git commit print" ON)
12+
option(WITH_PADDLE_INFO "compile with paddle commit/version print" ON)
13+
14+
set(PLUGIN_NAME "paddle-custom-sdaa")
15+
if(DEFINED ENV{PLUGIN_VERSION})
16+
set(PLUGIN_VERSION $ENV{PLUGIN_VERSION})
17+
else()
18+
set(PLUGIN_VERSION "0.0.1")
19+
endif()
20+
21+
include(paddle)
22+
include(generic)
23+
include(teco)
24+
include(third_party)
25+
26+
include_directories(
27+
${PADDLE_INC_DIR}
28+
${TECODNN_INC}
29+
${CMAKE_SOURCE_DIR}
30+
${EXTEND_OP_INC}
31+
${SDPTI_INC}
32+
${TBLAS_INC}
33+
${TECODNN_CUSTOM_INC}
34+
${TCCL_INC}
35+
${CLANG_INC})
36+
37+
if(NATIVE_SDAA)
38+
add_definitions(-DNATIVE_SDAA)
39+
include_directories(${SDAA_INC})
40+
endif()
41+
42+
add_definitions(-DPADDLE_WITH_CUSTOM_DEVICE)
43+
# TODO(Aganlengzi): avoid compile error, to be removed
44+
add_definitions(-DPADDLE_WITH_CUSTOM_KERNEL)
45+
if(WITH_SW)
46+
add_definitions(-DPADDLE_WITH_SW)
47+
endif()
48+
49+
if(WITH_GIT_COMMIT)
50+
execute_process(
51+
COMMAND git rev-parse --short HEAD
52+
OUTPUT_VARIABLE GIT_COMMIT_ID
53+
OUTPUT_STRIP_TRAILING_WHITESPACE)
54+
add_definitions(-DGIT_COMMIT_ID=\"${GIT_COMMIT_ID}\")
55+
endif()
56+
57+
if(WITH_PADDLE_INFO)
58+
get_paddle_info(PADDLE_COMMIT_ID PADDLE_FULL_VERSION PADDLE_BUILD_ENV_PATH)
59+
add_definitions(-DPADDLE_COMMIT_ID=\"${PADDLE_COMMIT_ID}\")
60+
add_definitions(-DPADDLE_FULL_VERSION=\"${PADDLE_FULL_VERSION}\")
61+
endif()
62+
63+
file(
64+
GLOB_RECURSE PLUGIN_SRCS
65+
RELATIVE ${CMAKE_SOURCE_DIR}
66+
kernels/*.cc)
67+
set(DYNLOAD_SRCS dynload/sdpti.cc dynload/dynamic_loader.cc)
68+
list(APPEND PLUGIN_SRCS ${DYNLOAD_SRCS})
69+
file(
70+
GLOB_RECURSE PLUGIN_RUNTIME_SRCS
71+
RELATIVE ${CMAKE_SOURCE_DIR}
72+
runtime/*.cc)
73+
list(APPEND PLUGIN_SRCS ${PLUGIN_RUNTIME_SRCS})
74+
75+
# version dump
76+
set(VERSION_DUMP_TARGET version_dump)
77+
set(PADDLE_SDAA_TOOLS_DIR ${CMAKE_SOURCE_DIR}/tools)
78+
set(VERSION_QUERY_DIR ${PADDLE_SDAA_TOOLS_DIR}/version)
79+
list(APPEND PLUGIN_SRCS ${VERSION_QUERY_DIR}/query.cc)
80+
configure_file(
81+
${CMAKE_CURRENT_SOURCE_DIR}/tools/version/minimum_supported_version.h.in
82+
${CMAKE_CURRENT_BINARY_DIR}/tools/version/minimum_supported_version.h @ONLY)
83+
add_executable(${VERSION_DUMP_TARGET} ${VERSION_QUERY_DIR}/query.cc
84+
${VERSION_QUERY_DIR}/dump.cc)
85+
target_include_directories(${VERSION_DUMP_TARGET}
86+
PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
87+
target_link_libraries(${VERSION_DUMP_TARGET} PRIVATE ${TCCL_LIB})
88+
target_link_libraries(${VERSION_DUMP_TARGET} PRIVATE ${SDAA_LIB})
89+
target_link_libraries(${VERSION_DUMP_TARGET} PRIVATE ${TECODNN_LIB})
90+
target_link_libraries(${VERSION_DUMP_TARGET} PRIVATE ${TBLAS_LIB})
91+
target_link_libraries(${VERSION_DUMP_TARGET} PRIVATE ${TECODNN_CUSTOM_LIB})
92+
93+
add_custom_command(
94+
TARGET ${VERSION_DUMP_TARGET}
95+
POST_BUILD
96+
COMMAND ${CMAKE_COMMAND} -E echo "Run version_dump to generate version.py"
97+
COMMAND ./${VERSION_DUMP_TARGET})
98+
99+
# build shared library
100+
add_library(${PLUGIN_NAME} SHARED ${PLUGIN_SRCS})
101+
set_target_properties(
102+
${PLUGIN_NAME} PROPERTIES LINK_FLAGS
103+
"-Wl,-rpath,$ORIGIN/lib/,--enable-new-dtags")
104+
target_include_directories(${PLUGIN_NAME} PRIVATE ${PADDLE_SDAA_TOOLS_DIR})
105+
106+
# link third_party
107+
add_dependencies(${PLUGIN_NAME} third_party)
108+
target_link_libraries(${PLUGIN_NAME} PRIVATE gflags glog)
109+
110+
# sdaa
111+
if(NATIVE_SDAA)
112+
target_link_libraries(${PLUGIN_NAME} PRIVATE ${SDAA_LIB})
113+
endif()
114+
target_link_libraries(${PLUGIN_NAME} PRIVATE ${EXTEND_OP_LIB})
115+
target_link_libraries(${PLUGIN_NAME} PRIVATE ${TECODNN_LIB})
116+
target_link_libraries(${PLUGIN_NAME} PRIVATE ${PADDLE_CORE_LIB})
117+
target_link_libraries(${PLUGIN_NAME} PRIVATE ${TBLAS_LIB})
118+
target_link_libraries(${PLUGIN_NAME} PRIVATE stdc++fs)
119+
target_link_libraries(${PLUGIN_NAME} PRIVATE ${TCCL_LIB})
120+
target_link_libraries(${PLUGIN_NAME} PRIVATE ${TECODNN_CUSTOM_LIB})
121+
122+
# testing
123+
if(WITH_TESTING)
124+
set(TEST_CUSTOM_DEVICE_ROOT
125+
CUSTOM_DEVICE_ROOT=${CMAKE_BINARY_DIR}/python/paddle_custom_device)
126+
add_subdirectory(tests)
127+
endif()
128+
129+
# packing wheel package
130+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
131+
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
132+
133+
add_custom_command(
134+
TARGET ${PLUGIN_NAME}
135+
POST_BUILD
136+
COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_BINARY_DIR}/python/
137+
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/python/
138+
COMMAND ${CMAKE_COMMAND} -E make_directory
139+
${CMAKE_CURRENT_BINARY_DIR}/python/sdaa-ext
140+
COMMAND ${CMAKE_COMMAND} -E make_directory
141+
${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/
142+
COMMAND ${CMAKE_COMMAND} -E make_directory
143+
${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/lib/
144+
COMMAND
145+
${CMAKE_COMMAND} -E copy_if_different
146+
${CMAKE_CURRENT_BINARY_DIR}/lib${PLUGIN_NAME}.so
147+
${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device/
148+
COMMENT "Creating plugin dirrectories------>>>")
149+
150+
add_custom_command(
151+
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/python/.timestamp
152+
COMMAND
153+
${CMAKE_COMMAND} -E copy_if_different
154+
${CMAKE_SOURCE_DIR}/sdaa_ext/python/__init__.py
155+
${CMAKE_CURRENT_BINARY_DIR}/python/paddle_custom_device
156+
COMMAND python3 ${CMAKE_CURRENT_BINARY_DIR}/setup.py bdist_wheel
157+
DEPENDS ${PLUGIN_NAME}
158+
COMMENT "Packing paddle-custom-sdaa whl packages------>>>")
159+
160+
add_custom_target(python_package ALL
161+
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/python/.timestamp)

backends/sdaa/README.md

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# PaddlePaddle Custom Device Implementation for Tecorigin SDAA
2+
3+
English | [简体中文](./README_cn.md)
4+
5+
Please refer to the following steps to compile, install and verify the custom device implementation for Tecorigin SDAA.
6+
7+
## Tecorigin SDAA System Requirements
8+
9+
| Module | Version |
10+
| --------- | -------- |
11+
| TecoDriver | 1.1.0 |
12+
| TecoToolkit | 1.1.0 |
13+
14+
## Prepare environment and source code
15+
```bash
16+
# 1. pull PaddlePaddle Tecorigin SDAA development docker image
17+
wget http://mirrors.tecorigin.com/repository/teco-docker-tar-repo/release/ubuntu22.04/x86_64/1.1.0/paddle-1.1.0-paddle_sdaa1.1.0.tar
18+
docker load < paddle-1.1.0-paddle_sdaa1.1.0.tar
19+
20+
# 2. refer to the following commands to start docker container and activate conda environment (PaddlePaddle framwork has been installed in the conda environment)
21+
docker run -it --name="paddle_sdaa_dev" --net=host -v $(pwd):/work \
22+
--device=/dev/tcaicard0 --device=/dev/tcaicard1 \
23+
--device=/dev/tcaicard2 --device=/dev/tcaicard3 \
24+
--cap-add SYS_PTRACE --cap-add SYS_ADMIN --shm-size 64g \
25+
jfrog.tecorigin.net/tecotp-docker/release/ubuntu22.04/x86_64/paddle:1.1.0-paddle_sdaa1.1.0 /bin/bash
26+
27+
conda activate paddle_env
28+
29+
# 3. clone the source code
30+
git clone https://github.com/PaddlePaddle/PaddleCustomDevice
31+
cd PaddleCustomDevice
32+
```
33+
34+
## Installation and Verification
35+
36+
### Source Code Compile
37+
38+
```bash
39+
# 1. update cmake
40+
pip install -U cmake
41+
42+
# 2. checkout branch to `develop`
43+
git checkout develop
44+
45+
# 3. execute the following commands to update submodule
46+
git submodule sync
47+
git submodule update --init --recursive
48+
49+
# 4. go to Tecorigin sdaa directory
50+
cd backends/sdaa
51+
52+
# 5. execute compile script
53+
bash compile.sh
54+
55+
# 6. install the generated whl package, which is under build/dist directory
56+
pip install -U build/dist/*.whl
57+
```
58+
59+
### Verification
60+
61+
```bash
62+
# 1. using paddle_sdaa utils's `run_check` to check whether paddle-sdaa plugin and PaddlePaddle framework are installed.
63+
python3 -c "import paddle_sdaa; paddle_sdaa.utils.run_check()"
64+
# expected output:
65+
paddle-sdaa and paddlepaddle are installed successfully!
66+
67+
68+
# 2. list available hardware backends
69+
python3 -c "import paddle; print(paddle.device.get_all_custom_device_type())"
70+
# expected ouput:
71+
['sdaa']
72+
73+
# 3. run relu forward
74+
python3 -c "import paddle;paddle.set_device('sdaa');print(paddle.nn.functional.relu(paddle.to_tensor([-2., 1.])))"
75+
# expected output:
76+
Tensor(shape=[2], dtype=float32, place=Place(sdaa:0), stop_gradient=True,
77+
[0., 1.])
78+
```
79+
80+
## Train and Inference Demo
81+
82+
```bash
83+
# demo for training, evaluation and inference
84+
python tests/test_MNIST_model.py
85+
86+
# training output as following
87+
Epoch [1/2], Iter [01/14], reader_cost: 1.41201 s, batch_cost: 1.56096 s, ips: 2624.03256 samples/s, eta: 0:00:43
88+
Epoch [1/2], Iter [02/14], reader_cost: 0.70611 s, batch_cost: 0.84809 s, ips: 4829.67512 samples/s, eta: 0:00:22
89+
... ...
90+
Epoch [2/2], Iter [14/14], reader_cost: 0.11122 s, batch_cost: 0.24438 s, ips: 16760.81762 samples/s, eta: 0:00:00
91+
Epoch ID: 2, Epoch time: 3.50429 s, reader_cost: 1.55708 s, batch_cost: 3.42131 s, avg ips: 16363.92196 samples/s
92+
Eval - Epoch ID: 2, Top1 accurary:: 0.84607, Top5 accurary:: 0.98462
93+
94+
# inference output as following
95+
I0307 05:21:33.673595 6583 interpretercore.cc:237] New Executor is Running.
96+
I0307 05:21:33.703184 6583 analysis_predictor.cc:1503] CustomDevice is enabled
97+
... ...
98+
I0307 05:21:33.707281 6583 analysis_predictor.cc:1660] ======= optimize end =======
99+
I0307 05:21:33.707347 6583 naive_executor.cc:164] --- skip [feed], feed -> inputs
100+
I0307 05:21:33.707659 6583 naive_executor.cc:164] --- skip [linear_5.tmp_1], fetch -> fetch
101+
Output data size is 10
102+
Output data shape is (1, 10)
103+
```
104+
105+
## Environment Variables
106+
107+
| Subject | Variable Name | Type | Description | Default Value |
108+
| -------- | -------------------------------- | ------ | --------------------------------- | ------------------------------------------------------------ |
109+
| Debug | CUSTOM_DEVICE_BLACK_LIST| String | Ops in back list will fallbacks to CPU | "" |
110+
| Profiling | ENABLE_SDPTI | String | enable sdpti | 1 |
111+
| Debug | HIGH_PERFORMANCE_CONV | String | set HIGH_PERFORMANCE_CONV to `"1"` can enable high performance conv API | 0 |
112+
| Debug | FLAGS_sdaa_runtime_debug | bool | print runtime information | false |
113+
| Feature | FLAGS_sdaa_reuse_event | bool | enable event pool | true |

0 commit comments

Comments
 (0)