Skip to content

Commit f484e04

Browse files
committed
For vLLM health check, using docker service name instead to host_ip
Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
1 parent 93cc59b commit f484e04

File tree

2 files changed

+19
-1
lines changed

2 files changed

+19
-1
lines changed

ChatQnA/docker_compose/intel/cpu/xeon/compose.perf.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,21 @@ services:
88
VLLM_CPU_SGL_KERNEL: 1
99
entrypoint: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
1010
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 --dtype bfloat16 --distributed-executor-backend mp --block-size 128 --enforce-eager --tensor-parallel-size $TP_NUM --pipeline-parallel-size $PP_NUM --max-num-batched-tokens $MAX_BATCHED_TOKENS --max-num-seqs $MAX_SEQS
11+
vllm-ci-test:
12+
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:a5dd03c1ebc5e4f56f3c9d3dc0436e9c582c978f-cpu
13+
container_name: vllm-ci-test
14+
volumes:
15+
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
16+
shm_size: 128g
17+
environment:
18+
no_proxy: ${no_proxy}
19+
http_proxy: ${http_proxy}
20+
https_proxy: ${https_proxy}
21+
HF_TOKEN: ${HF_TOKEN}
22+
LLM_MODEL_ID: ${LLM_MODEL_ID}
23+
VLLM_CPU_KVCACHE_SPACE: 40
24+
ON_CPU: 1
25+
REMOTE_HOST: vllm-service
26+
REMOTE_PORT: 80
27+
entrypoint: tail -f /dev/null
28+

ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ services:
104104
VLLM_TORCH_PROFILER_DIR: "/mnt"
105105
VLLM_CPU_KVCACHE_SPACE: 40
106106
healthcheck:
107-
test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
107+
test: ["CMD-SHELL", "curl -f http://vllm-service:80/health || exit 1"]
108108
interval: 10s
109109
timeout: 10s
110110
retries: 100

0 commit comments

Comments
 (0)