@@ -47,17 +47,38 @@ RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \
47
47
git submodule update --init --recursive --jobs 8 \
48
48
--depth 1 --filter=blob:none
49
49
50
+ FROM alpine/git:2.36.3 as vllm-flash-attn-downloader
51
+ WORKDIR /git
52
+ ARG VLLM_FLASH_ATTN_VERSION
53
+ RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \
54
+ https://github.com/vllm-project/flash-attention.git && \
55
+ cd flash-attention && \
56
+ git checkout "v${VLLM_FLASH_ATTN_VERSION}" && \
57
+ git submodule update --init --recursive --jobs 8 \
58
+ --depth 1 --filter=blob:none
59
+
50
60
FROM builder-base as vllm-builder
51
61
WORKDIR /workspace
52
- RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \
53
- --mount=type=bind,from=freezer,target=/tmp/frozen,rw \
54
- /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \
55
- LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}" \
56
- python3 -m pip wheel -w /wheels \
62
+
63
+ ENV LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}"
64
+
65
+ RUN --mount=type=bind,from=freezer,target=/tmp/frozen,rw \
66
+ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/constraints.txt
67
+
68
+ RUN --mount=type=bind,from=vllm-flash-attn-downloader,source=/git/flash-attention,target=/workspace,rw \
69
+ python3 -m pip wheel -w /wheels \
57
70
-v --no-cache-dir --no-build-isolation --no-deps \
58
- -c /tmp/frozen/ constraints.txt \
71
+ -c /tmp/constraints.txt \
59
72
./
60
73
74
+ RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \
75
+ pip3 install /wheels/*.whl && \
76
+ python3 -m pip wheel -w /wheels \
77
+ -v --no-cache-dir --no-build-isolation --no-deps \
78
+ -c /tmp/constraints.txt \
79
+ ./ && \
80
+ pip3 uninstall -y vllm-flash-attn
81
+
61
82
WORKDIR /wheels
62
83
63
84
FROM ${BASE_IMAGE} as base
0 commit comments