Skip to content

Commit e2a62dc

Browse files
authored
Bump versions to 0.1.4 (#25)
Changelog: - Initial Topping Support (#10, @xzyaoi, @enothum) - Qwen model support (#24, @xzyaoi) - Initial Shepherd (routing) demo (#20, @xzyaoi)
1 parent c542c23 commit e2a62dc

File tree

121 files changed

+6752
-1212
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

121 files changed

+6752
-1212
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ pyrightconfig.json
55
.local
66
.vscode
77
.zed
8+
.data
9+
*.ipynb

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,7 @@ html-docs:
1414
sphinx-build -M html docs/sources docs/build
1515
cli-docs:
1616
typer scratchpad.cli.sp utils docs --title "CLI Reference" --name "scratchpad" --output docs/sources/cli.md
17+
monitor-up:
18+
docker compose -f docker/monitor.yaml up -d
19+
monitor-down:
20+
docker compose -f docker/monitor.yaml down

docker/Dockerfile.aarch64-cuda

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,26 @@ LABEL org.opencontainers.image.architecture=arm64
88
ARG CUDA_VERSION=12.4.1
99
ARG PYTHON_VERSION=3.10
1010
ENV DEBIAN_FRONTEND=noninteractive
11+
ENV TRITEIA_COMPUTE_CAP=90
12+
ENV TORCH_CUDA_ARCH_LIST="9.0"
13+
ENV FLASHINFER_ENABLE_AOT="1"
1114

1215
RUN apt update && apt upgrade -y
1316

1417
WORKDIR /scratchpad
1518

1619
COPY . /scratchpad
1720

18-
RUN pip install https://filedn.eu/lougUsdPvd1uJK2jfOYWogH/pypi/flashinfer-0.1.6-cp310-cp310-linux_aarch64.whl
19-
RUN pip install https://filedn.eu/lougUsdPvd1uJK2jfOYWogH/pypi/triteia-0.1.0-cp310-cp310-linux_aarch64.whl
21+
RUN git clone -b v0.1.6 https://github.com/flashinfer-ai/flashinfer.git --recursive && \
22+
cd flashinfer/python && \
23+
pip install --no-build-isolation --verbose --editable .
24+
25+
RUN git clone https://github.com/eth-easl/triteia.git && \
26+
cd triteia && \
27+
git submodule update --init --recursive && \
28+
pip install -e .
2029
RUN pip install -r meta/requirements-extra.txt
2130
RUN pip install .
31+
2232
# todo(xiaozhe): figure out why pynvml is installed in the first place. We should use nvidia-ml-py instead.
2333
RUN pip uninstall pynvml -y

docker/Dockerfile.x86_64-cuda

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,29 @@
1-
FROM nvcr.io/nvidia/pytorch:24.07-py3 AS base
1+
FROM nvcr.io/nvidia/pytorch:24.05-py3 AS base
22

33
LABEL org.opencontainers.image.source=https://github.com/xiaozheyao/Scratchpad
44
LABEL org.opencontainers.image.description="Scratchpad: Adaptive Serving of LMs"
55
LABEL org.opencontainers.image.licenses=Apache-2.0
66
LABEL org.opencontainers.image.architecture=amd64
77

8-
ARG CUDA_VERSION=12.4.1
9-
ARG PYTHON_VERSION=3.10
108
ENV DEBIAN_FRONTEND=noninteractive
9+
ENV TRITEIA_COMPUTE_CAP=80
10+
ENV TORCH_CUDA_ARCH_LIST="8.0"
11+
ENV FLASHINFER_ENABLE_AOT="1"
1112

1213
RUN apt update && apt upgrade -y
1314

1415
WORKDIR /scratchpad
1516

1617
COPY . /scratchpad
1718

18-
RUN pip install flashinfer -i https://flashinfer.ai/whl/cu124/torch2.4/
19-
RUN pip install https://filedn.eu/lougUsdPvd1uJK2jfOYWogH/pypi/triteia-0.1.0-cp310-cp310-linux_x86_64.whl
19+
RUN git clone -b v0.1.6 https://github.com/flashinfer-ai/flashinfer.git --recursive && \
20+
cd flashinfer/python && \
21+
pip install --no-build-isolation --verbose --editable .
22+
23+
RUN git clone https://github.com/eth-easl/triteia.git && \
24+
cd triteia && \
25+
git submodule update --init --recursive && \
26+
pip install -e .
2027
RUN pip install -r meta/requirements-extra.txt
2128
RUN pip install .
2229
RUN pip uninstall pynvml -y

docker/build_image.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ if [ -z "$version" ]; then
88
exit 1
99
fi
1010
echo "Building image for $arch, version $version"
11-
$buildtool build -f docker/Dockerfile.$arch-cuda . -t ghcr.io/xiaozheyao/scratchpad:${version}dev-$arch --build-arg ARCH=$arch
12-
$buildtool push ghcr.io/xiaozheyao/scratchpad:${version}dev-$arch
11+
DOCKER_BUILDKIT=0 $buildtool build -f docker/Dockerfile.$arch-cuda . -t ghcr.io/xiaozheyao/scratchpad:${version}dev-$arch --build-arg ARCH=$arch
12+
$buildtool push ghcr.io/xiaozheyao/scratchpad:${version}dev-$arch

docker/prometheus.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ scrape_configs:
55
- job_name: scratchpad
66
static_configs:
77
- targets:
8-
# - 'host.docker.internal:8080'
9-
- "172.25.4.12:8080"
8+
- 'host.docker.internal:8080'
9+
# - "172.25.4.12:8080"

docs/sources/toppings.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Toppings
2+
3+
```bash
4+
sp serve meta-llama/Llama-3.2-1B-Instruct --host 0.0.0.0 --port 8080 \
5+
--enable-system-controller \
6+
--use-heterogeneous-pool \
7+
--enable-toppings \
8+
--init-toppings lora:ketchup123/llama-3.2-1B-instruct-gsm8k:ketchup123/llama-3.2-1B-instruct-gsm8k,delta:deltazip/meta-llama.Llama-3.2-1B-Instruct.4b_2n4m_128bs:deltazip/meta-llama.Llama-3.2-1B-Instruct.4b_2n4m_128bs-1,delta:deltazip/meta-llama.Llama-3.2-1B-Instruct.4b_2n4m_128bs:deltazip/meta-llama.Llama-3.2-1B-Instruct.4b_2n4m_128bs-2 \
9+
--attention-backend triton \
10+
--sampling-backend pytorch \
11+
--max-toppings-per-batch 2 \
12+
--disable-cuda-graph
13+
```

meta/requirements-extra.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
gguf
2-
pillow
2+
matplotlib
33
prompt_toolkit
44
openai
5+
faiss-cpu

meta/requirements.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,8 @@ python-multipart
1515
rich
1616
humanize
1717
prometheus_client
18+
tqdm
19+
einops
20+
pillow
21+
tenacity
22+
orjson

scratchpad/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__ = "0.1.4"

0 commit comments

Comments
 (0)