For vLLM health check, using docker service name instead to host_ip

louie-tsai · louie-tsai · commit 83668ea25438 · 2025-08-14T23:45:33.000-07:00
Signed-off-by: Tsai, Louie &lt;louie.tsai@intel.com&gt;
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.perf.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.perf.yaml
@@ -8,3 +8,20 @@ services:
       VLLM_CPU_SGL_KERNEL: 1
     entrypoint: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
     command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 --dtype bfloat16 --distributed-executor-backend mp --block-size 128 --enforce-eager --tensor-parallel-size $TP_NUM --pipeline-parallel-size $PP_NUM --max-num-batched-tokens $MAX_BATCHED_TOKENS --max-num-seqs $MAX_SEQS
+  vllm-ci-test:
+    image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:a5dd03c1ebc5e4f56f3c9d3dc0436e9c582c978f-cpu
+    container_name: vllm-ci-test
+    volumes:
+      - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
+    shm_size: 128g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HF_TOKEN}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      VLLM_CPU_KVCACHE_SPACE: 40
+      ON_CPU: 1
+      REMOTE_HOST: vllm-service
+      REMOTE_PORT: 80
+    entrypoint: tail -f /dev/null
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -106,7 +106,7 @@ services:
       HF_HUB_OFFLINE: ${HF_HUB_OFFLINE:-0}
       VLLM_CPU_KVCACHE_SPACE: 40
     healthcheck:
-      test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
+      test: ["CMD-SHELL", "curl -f http://vllm-service:80/health || exit 1"]
       interval: 10s
       timeout: 10s
       retries: 100
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -13,7 +13,7 @@ export HF_TOKEN=${HF_TOKEN}
 export host_ip=${ip_address}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
 export INDEX_NAME="rag-redis"
 # Set it as a non-null string, such as true, if you want to enable logging facility,
 # otherwise, keep it as "" to disable it.