Skip to content

Commit b19733c

Browse files
committed
update helm for vllm 0.10.0
Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
1 parent 414db20 commit b19733c

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

ChatQnA/kubernetes/helm/cpu-values-perf.yaml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,22 @@
44
vllm:
55
image:
66
repository: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo
7-
tag: "v0.9.2"
7+
tag: "v0.10.0"
88
resources: {}
99
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
1010
# Uncomment the following model specific settings for DeepSeek models
1111
VLLM_CPU_KVCACHE_SPACE: 40
12+
VLLM_CPU_SGK_KERNEL: 1
1213

1314
extraCmdArgs: [
14-
"--tensor-parallel-size", "2",
15+
"--tensor-parallel-size", "1",
16+
"--pipeline-parallel-size", "1",
1517
"--block-size", "128",
1618
"--dtype", "bfloat16",
17-
"--max-model-len","5196",
19+
"--max-model-len", "5196",
1820
"--distributed_executor_backend", "mp",
19-
"--enable_chunked_prefill",
21+
"--max-num-batched-tokens", "2048",
22+
"--max-num-seqs", "256",
2023
"--enforce-eager"]
2124
#resources:
2225
# requests:

0 commit comments

Comments
 (0)