File tree Expand file tree Collapse file tree 1 file changed +7
-4
lines changed Expand file tree Collapse file tree 1 file changed +7
-4
lines changed Original file line number Diff line number Diff line change 4
4
vllm :
5
5
image :
6
6
repository : public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo
7
- tag : " v0.9.2 "
7
+ tag : " v0.10.0 "
8
8
resources : {}
9
9
LLM_MODEL_ID : meta-llama/Meta-Llama-3-8B-Instruct
10
10
# Uncomment the following model specific settings for DeepSeek models
11
11
VLLM_CPU_KVCACHE_SPACE : 40
12
+ VLLM_CPU_SGK_KERNEL : 1
12
13
13
14
extraCmdArgs : [
14
- " --tensor-parallel-size" , "2",
15
+ " --tensor-parallel-size" , "1",
16
+ " --pipeline-parallel-size" , "1",
15
17
" --block-size" , "128",
16
18
" --dtype" , "bfloat16",
17
- " --max-model-len" ,"5196",
19
+ " --max-model-len" , "5196",
18
20
" --distributed_executor_backend" , "mp",
19
- " --enable_chunked_prefill" ,
21
+ " --max-num-batched-tokens" , "2048",
22
+ " --max-num-seqs" , "256",
20
23
" --enforce-eager" ]
21
24
# resources:
22
25
# requests:
You can’t perform that action at this time.
0 commit comments