Skip to content

Commit 2ee36fc

Browse files
committed
adding CI test and new cpu-value-perf.yaml to address review feedback
Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
1 parent b45a99b commit 2ee36fc

File tree

3 files changed

+26
-16
lines changed

3 files changed

+26
-16
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
vllm:
5+
image:
6+
repository: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo
7+
tag: "v0.9.2"
8+
resources: {}
9+
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
10+
# Uncomment the following model specific settings for DeepSeek models
11+
VLLM_CPU_KVCACHE_SPACE: 40
12+
13+
extraCmdArgs: [
14+
"--tensor-parallel-size", "2",
15+
"--block-size", "128",
16+
"--dtype", "bfloat16",
17+
"--max-model-len","5196",
18+
"--distributed_executor_backend", "mp",
19+
"--enable_chunked_prefill",
20+
"--enforce-eager"]
21+
#resources:
22+
# requests:
23+
# memory: 60Gi # 40G for KV cache, and 20G for DeepSeek-R1-Distill-Qwen-7B, need to adjust it for other models

ChatQnA/kubernetes/helm/cpu-values.yaml

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,9 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
vllm:
5-
image:
6-
repository: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo
7-
tag: "v0.9.2"
8-
resources: {}
95
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
106
# Uncomment the following model specific settings for DeepSeek models
11-
VLLM_CPU_KVCACHE_SPACE: 40
12-
13-
extraCmdArgs: [
14-
"--tensor-parallel-size", "2",
15-
"--block-size", "128",
16-
"--dtype", "bfloat16",
17-
"--max-model-len","5196",
18-
"--distributed_executor_backend", "mp",
19-
"--enable_chunked_prefill",
20-
"--enforce-eager"]
7+
#VLLM_CPU_KVCACHE_SPACE: 40
218
#resources:
229
# requests:
2310
# memory: 60Gi # 40G for KV cache, and 20G for DeepSeek-R1-Distill-Qwen-7B, need to adjust it for other models

ChatQnA/tests/test_compose_on_xeon.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ function start_services() {
4343
source set_env.sh
4444

4545
# Start Docker Containers
46-
docker compose -f compose.yaml -f compose.telemetry.yaml up -d --quiet-pull > ${LOG_PATH}/start_services_with_compose.log
46+
docker compose -f compose.yaml -f compose.telemetry.yaml -f compose.perf.yaml up -d --quiet-pull > ${LOG_PATH}/start_services_with_compose.log
4747
n=0
4848
until [[ "$n" -ge 100 ]]; do
4949
docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
@@ -163,7 +163,7 @@ function validate_frontend() {
163163

164164
function stop_docker() {
165165
cd $WORKPATH/docker_compose/intel/cpu/xeon
166-
docker compose -f compose.yaml -f compose.telemetry.yaml down
166+
docker compose -f compose.yaml -f compose.telemetry.yaml -f compose.perf.yaml down
167167
}
168168

169169
function main() {

0 commit comments

Comments
 (0)