Skip to content

Commit 467a303

Browse files
authored
Merge pull request #70 from coreweave/es/vllm-tensorizer
build(vllm-tensorizer): Compile `vllm-flash-attn` from source
2 parents 93acafd + 52fea59 commit 467a303

File tree

2 files changed

+32
-7
lines changed

2 files changed

+32
-7
lines changed

.github/workflows/vllm-tensorizer.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ on:
44
commit:
55
description: 'Commit to build'
66
required: true
7+
vllm-flash-attn-version:
8+
description: 'vllm-flash-attn version to build'
9+
required: true
710
push:
811
paths:
912
- "vllm-tensorizer/**"
@@ -20,4 +23,5 @@ jobs:
2023
folder: vllm-tensorizer
2124
tag-suffix: ${{ inputs.commit || '51602eefd38250325e541abd28f051ffd7676c3f'}}
2225
build-args: |
23-
COMMIT_HASH=${{ inputs.commit || '51602eefd38250325e541abd28f051ffd7676c3f'}}
26+
COMMIT_HASH=${{ inputs.commit || '51602eefd38250325e541abd28f051ffd7676c3f'}}
27+
VLLM_FLASH_ATTN_VERSION=${{ inputs.vllm-flash-attn-version || '2.5.9'}}

vllm-tensorizer/Dockerfile

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,38 @@ RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \
4747
git submodule update --init --recursive --jobs 8 \
4848
--depth 1 --filter=blob:none
4949

50+
FROM alpine/git:2.36.3 as vllm-flash-attn-downloader
51+
WORKDIR /git
52+
ARG VLLM_FLASH_ATTN_VERSION
53+
RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \
54+
https://github.com/vllm-project/flash-attention.git && \
55+
cd flash-attention && \
56+
git checkout "v${VLLM_FLASH_ATTN_VERSION}" && \
57+
git submodule update --init --recursive --jobs 8 \
58+
--depth 1 --filter=blob:none
59+
5060
FROM builder-base as vllm-builder
5161
WORKDIR /workspace
52-
RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \
53-
--mount=type=bind,from=freezer,target=/tmp/frozen,rw \
54-
/tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \
55-
LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}" \
56-
python3 -m pip wheel -w /wheels \
62+
63+
ENV LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}"
64+
65+
RUN --mount=type=bind,from=freezer,target=/tmp/frozen,rw \
66+
/tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/constraints.txt
67+
68+
RUN --mount=type=bind,from=vllm-flash-attn-downloader,source=/git/flash-attention,target=/workspace,rw \
69+
python3 -m pip wheel -w /wheels \
5770
-v --no-cache-dir --no-build-isolation --no-deps \
58-
-c /tmp/frozen/constraints.txt \
71+
-c /tmp/constraints.txt \
5972
./
6073

74+
RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \
75+
pip3 install /wheels/*.whl && \
76+
python3 -m pip wheel -w /wheels \
77+
-v --no-cache-dir --no-build-isolation --no-deps \
78+
-c /tmp/constraints.txt \
79+
./ && \
80+
pip3 uninstall -y vllm-flash-attn
81+
6182
WORKDIR /wheels
6283

6384
FROM ${BASE_IMAGE} as base

0 commit comments

Comments
 (0)