File tree Expand file tree Collapse file tree 2 files changed +19
-1
lines changed
ChatQnA/docker_compose/intel/cpu/xeon Expand file tree Collapse file tree 2 files changed +19
-1
lines changed Original file line number Diff line number Diff line change @@ -8,3 +8,21 @@ services:
8
8
VLLM_CPU_SGL_KERNEL : 1
9
9
entrypoint : ["python3", "-m", "vllm.entrypoints.openai.api_server"]
10
10
command : --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 --dtype bfloat16 --distributed-executor-backend mp --block-size 128 --enforce-eager --tensor-parallel-size $TP_NUM --pipeline-parallel-size $PP_NUM --max-num-batched-tokens $MAX_BATCHED_TOKENS --max-num-seqs $MAX_SEQS
11
+ vllm-ci-test :
12
+ image : public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:a5dd03c1ebc5e4f56f3c9d3dc0436e9c582c978f-cpu
13
+ container_name : vllm-ci-test
14
+ volumes :
15
+ - " ${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
16
+ shm_size : 128g
17
+ environment :
18
+ no_proxy : ${no_proxy}
19
+ http_proxy : ${http_proxy}
20
+ https_proxy : ${https_proxy}
21
+ HF_TOKEN : ${HF_TOKEN}
22
+ LLM_MODEL_ID : ${LLM_MODEL_ID}
23
+ VLLM_CPU_KVCACHE_SPACE : 40
24
+ ON_CPU : 1
25
+ REMOTE_HOST : vllm-service
26
+ REMOTE_PORT : 80
27
+ entrypoint : tail -f /dev/null
28
+
Original file line number Diff line number Diff line change @@ -104,7 +104,7 @@ services:
104
104
VLLM_TORCH_PROFILER_DIR : " /mnt"
105
105
VLLM_CPU_KVCACHE_SPACE : 40
106
106
healthcheck :
107
- test : ["CMD-SHELL", "curl -f http://$host_ip:9009 /health || exit 1"]
107
+ test : ["CMD-SHELL", "curl -f http://vllm-service:80 /health || exit 1"]
108
108
interval : 10s
109
109
timeout : 10s
110
110
retries : 100
You can’t perform that action at this time.
0 commit comments