@@ -9,6 +9,10 @@ ARG BUILD_TRITON_VERSION=""
9
9
ARG BUILD_TORCH_CUDA_ARCH_LIST="6.0 6.1 6.2 7.0 7.2 7.5 8.0 8.6 8.9 9.0+PTX"
10
10
# 8.7 is supported in the PyTorch main branch, but not 2.0.0
11
11
12
+ ARG AOCL_BASE="/opt/aocl"
13
+ ARG AOCL_VER="4.2.0"
14
+ ARG AOCL_URL="https://download.amd.com/developer/eula/aocl/aocl-4-2/aocl-linux-aocc-4.2.0.tar.gz"
15
+
12
16
# Clone PyTorch repositories independently from all other build steps
13
17
# for cache-friendliness and parallelization
14
18
FROM alpine/git:2.40.1 as downloader-base
@@ -60,6 +64,30 @@ RUN if [ -n "${BUILD_TRITON_VERSION}" ]; then \
60
64
mkdir triton; \
61
65
fi;
62
66
67
+ FROM alpine/curl:8.7.1 as aocl-downloader
68
+ WORKDIR /tmp/install
69
+
70
+ RUN apk add --no-cache bash
71
+
72
+ ARG AOCL_BASE
73
+ ARG AOCL_VER
74
+ ARG AOCL_URL
75
+
76
+ RUN curl -sSfo- "${AOCL_URL}" | tar xzf - --strip-components 1 && \
77
+ INSTALL_LIB() { ./install.sh -l "$1" -t "${AOCL_BASE}" -i lp64; } && \
78
+ INSTALL_LIB blis && \
79
+ INSTALL_LIB libflame && \
80
+ INSTALL_LIB utils && \
81
+ . ./amd-libs.cfg && \
82
+ rm -r "${AOCL_ROOT}/include_ILP64" && \
83
+ rm -r "${AOCL_ROOT}/lib_ILP64" && \
84
+ ln -s "${AOCL_ROOT}/amd-libs.cfg" "${AOCL_BASE}/amd-libs.cfg" && \
85
+ ln -s "${AOCL_ROOT}/include" "${AOCL_BASE}/include" && \
86
+ ln -s "${AOCL_ROOT}/lib" "${AOCL_BASE}/lib" && \
87
+ echo "${AOCL_BASE}/lib" \
88
+ | install -m 0644 /dev/stdin "${AOCL_BASE}/aocl.conf" && \
89
+ rm -r ./*
90
+
63
91
64
92
# # Build PyTorch on a builder image.
65
93
FROM ${BUILDER_BASE_IMAGE} as builder
@@ -70,12 +98,14 @@ ARG BUILD_CCACHE_SIZE="1Gi"
70
98
# ninja-build, ccache, and lld are optional but improve the build
71
99
RUN apt-get -qq update && apt-get -qq install -y \
72
100
libncurses5 python3 python3-pip git apt-utils ssh ca-certificates \
73
- libpng-dev libjpeg-dev pkg-config python3-distutils \
101
+ libomp5 libpng-dev libjpeg-dev pkg-config python3-distutils \
74
102
build-essential ninja-build && \
75
103
apt-get clean && \
76
104
/usr/bin/python3 -m pip install --no-cache-dir --upgrade pip && \
77
105
update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
78
- update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
106
+ update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
107
+ ln -s libomp.so.5 /usr/lib/x86_64-linux-gnu/libomp.so && \
108
+ ldconfig
79
109
80
110
RUN mkdir /tmp/ccache-install && \
81
111
cd /tmp/ccache-install && \
@@ -116,6 +146,37 @@ RUN CODENAME="$(lsb_release -cs)" && \
116
146
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11 && \
117
147
update-alternatives --install /usr/bin/ld ld /usr/bin/ld.lld-17 1
118
148
149
+ # Install AOCL-BLAS and AOCL-LAPACK
150
+ # See: https://www.amd.com/en/developer/aocl/dense.html
151
+ ARG AOCL_BASE
152
+ COPY --from=aocl-downloader "${AOCL_BASE}" "${AOCL_BASE}"
153
+
154
+ # `ldconfig` lets the dynamic linker access AOCL libraries
155
+ RUN install -m 0644 -t /etc/ld.so.conf.d "${AOCL_BASE}/aocl.conf" && \
156
+ ldconfig
157
+
158
+ # These environment variables are only for the build stage,
159
+ # and register paths to build-time AOCL resources.
160
+ # This could alternatively be done by invoking `. "${AOCL_BASE}/amd-libs.cfg"`
161
+ # in every RUN compilation step, but this will make sure it is never missed.
162
+ #
163
+ # PyTorch's logic to find LAPACK during CMake configuration
164
+ # additionally requires its installed path to either be in:
165
+ # - One of:
166
+ # - /usr/local/lib, or
167
+ # - /usr/lib, or
168
+ # - /usr/local/lib64, or
169
+ # - /usr/lib64, or
170
+ # - /usr/lib/aarch64-linux-gnu, or
171
+ # - $LD_LIBRARY_PATH
172
+ # While skipping $LIBRARY_PATH, and ld's normal configured paths,
173
+ # so it is necessary to add $LD_LIBRARY_PATH here as well.
174
+ # See: https://github.com/pytorch/pytorch/blob/v2.3.0/cmake/Modules/FindLAPACK.cmake#L56-L59
175
+ ENV C_INCLUDE_PATH="${AOCL_BASE}/include${C_INCLUDE_PATH:+:$C_INCLUDE_PATH}" \
176
+ CPLUS_INCLUDE_PATH="${AOCL_BASE}/include${CPLUS_INCLUDE_PATH:+:$CPLUS_INCLUDE_PATH}" \
177
+ LD_LIBRARY_PATH="${AOCL_BASE}/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" \
178
+ LIBRARY_PATH="${AOCL_BASE}/lib${LIBRARY_PATH:+:$LIBRARY_PATH}"
179
+
119
180
RUN mkdir /build /build/dist
120
181
WORKDIR /build
121
182
COPY --chmod=755 effective_cpu_count.sh .
@@ -190,6 +251,19 @@ ENV TORCH_CUDA_ARCH_LIST=$BUILD_TORCH_CUDA_ARCH_LIST
190
251
#
191
252
# This step is itself cacheable as long as the downloaded files (and ARCH_LIST)
192
253
# remain the same.
254
+ #
255
+ # NB: This cannot specify BLAS=FLAME directly, because PyTorch (v2.3.0)'s code
256
+ # to explicitly choose a BLAS implementation is missing that option
257
+ # (See: https://github.com/pytorch/pytorch/blob/v2.3.0/cmake/Dependencies.cmake#L195-L266),
258
+ # and using BLAS=blis makes it ignore the libflame LAPACK library, because
259
+ # that triggers its FindBLIS logic rather than FindBLAS, and FindLAPACK depends
260
+ # on a variable set only during FindBLAS (BLAS_INFO=FLAME)
261
+ # (See: https://github.com/pytorch/pytorch/blob/v2.3.0/cmake/Modules/FindLAPACK.cmake#L176-L189).
262
+ # Thus, we have to force it to use its generic FindBLAS logic,
263
+ # and narrow it down from there by specifying WITH_BLAS=FLAME
264
+ # (See: https://github.com/pytorch/pytorch/blob/v2.3.0/cmake/Modules/FindBLAS.cmake#L259-L271).
265
+ # Without WITH_BLAS, it would detect the BLAS implementation as
266
+ # BLAS_INFO=blis instead of BLAS_INFO=FLAME and wouldn't include LAPACK either.
193
267
RUN --mount=type=bind,from=pytorch-downloader,source=/git/pytorch,target=pytorch/,rw \
194
268
--mount=type=cache,target=/ccache \
195
269
export MAX_JOBS="$(./scale.sh " $(./effective_cpu_count.sh)" 3 32)" && \
@@ -208,16 +282,16 @@ RUN --mount=type=bind,from=pytorch-downloader,source=/git/pytorch,target=pytorch
208
282
UCC_HOME=${HPCX_UCC_DIR} UCX_HOME=${HPCX_UCX_DIR} \
209
283
USE_NCCL_WITH_UCC=1 \
210
284
USE_UCC=1 USE_SYSTEM_UCC=1; fi; } && \
211
- USE_OPENCV=1 \
212
285
BUILD_TORCH=ON \
213
286
BUILD_TEST=0 \
214
287
CUDA_HOST_COMPILER=cc \
215
288
USE_CUDA=1 \
216
289
USE_NNPACK=1 \
217
290
CC=cc \
218
291
CXX=c++ \
219
- USE_EIGEN_FOR_BLAS=ON \
220
- USE_MKL=OFF \
292
+ USE_BLAS=1 \
293
+ USE_LAPACK=1 \
294
+ WITH_BLAS=FLAME \
221
295
PYTORCH_BUILD_VERSION="$(../version-string.sh " $TORCH_VERSION")" \
222
296
PYTORCH_BUILD_NUMBER=0 \
223
297
TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
@@ -254,8 +328,6 @@ RUN --mount=type=bind,from=torchvision-downloader,source=/git/vision,target=visi
254
328
USE_NNPACK=1 \
255
329
CC=cc \
256
330
CXX=c++ \
257
- USE_EIGEN_FOR_BLAS=ON \
258
- USE_MKL=OFF \
259
331
BUILD_VERSION="$(../version-string.sh " $TORCH_VISION_VERSION")" \
260
332
TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
261
333
python3 setup.py bdist_wheel --dist-dir ../dist
@@ -290,8 +362,6 @@ RUN --mount=type=bind,from=torchaudio-downloader,source=/git/audio,target=audio/
290
362
USE_NNPACK=1 \
291
363
CC=cc \
292
364
CXX=c++ \
293
- USE_EIGEN_FOR_BLAS=ON \
294
- USE_MKL=OFF \
295
365
BUILD_VERSION="$(../version-string.sh " $TORCH_AUDIO_VERSION")" \
296
366
TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
297
367
python3 setup.py bdist_wheel --dist-dir ../dist
@@ -304,14 +374,16 @@ ENV DEBIAN_FRONTEND=noninteractive
304
374
# Install core packages
305
375
RUN apt-get -qq update && apt-get -qq install -y \
306
376
libncurses5 python3 python3-pip python3-distutils \
307
- libpng16-16 libjpeg-turbo8 libsodium23 \
377
+ libomp5 libpng16-16 libjpeg-turbo8 libsodium23 \
308
378
curl git apt-utils ssh ca-certificates tmux nano vim-tiny sudo bash \
309
379
rsync htop wget unzip tini && \
380
+ apt-get clean && \
310
381
/usr/bin/python3 -m pip install --no-cache-dir --upgrade pip && \
311
382
update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
312
383
update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
313
384
update-alternatives --install /usr/bin/vim vim /usr/bin/vim.tiny 1 && \
314
- apt-get clean
385
+ ln -s libomp.so.5 /usr/lib/x86_64-linux-gnu/libomp.so && \
386
+ ldconfig
315
387
316
388
RUN apt-get -qq update && apt-get -qq install -y --no-install-recommends \
317
389
software-properties-common && \
@@ -323,6 +395,15 @@ RUN apt-get -qq update && apt-get -qq install -y --no-install-recommends \
323
395
} && \
324
396
{ SETUP_LIBSTDCXX || { sleep "$(shuf -i10-20 -n1)" && SETUP_LIBSTDCXX; }; }
325
397
398
+ # Install AOCL-BLAS and AOCL-LAPACK
399
+ # See: https://www.amd.com/en/developer/aocl/dense.html
400
+ ARG AOCL_BASE
401
+ COPY --from=aocl-downloader "${AOCL_BASE}" "${AOCL_BASE}"
402
+
403
+ # `ldconfig` lets the dynamic linker access AOCL libraries
404
+ RUN install -m 0644 -t /etc/ld.so.conf.d "${AOCL_BASE}/aocl.conf" && \
405
+ ldconfig
406
+
326
407
ARG BUILD_TORCH_VERSION
327
408
ARG BUILD_TORCH_VISION_VERSION
328
409
ARG BUILD_TORCH_AUDIO_VERSION
0 commit comments