We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e846deb commit 40cfd77Copy full SHA for 40cfd77
ChatQnA/docker_compose/intel/cpu/xeon/compose.perf.yaml
@@ -6,4 +6,5 @@ services:
6
image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.0
7
environment:
8
VLLM_CPU_SGL_KERNEL: 1
9
+ entrypoint: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
10
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 --dtype bfloat16 --distributed-executor-backend mp --block-size 128 --enforce-eager --tensor-parallel-size $TP_NUM --pipeline-parallel-size $PP_NUM --max-num-batched-tokens $MAX_BATCHED_TOKENS --max-num-seqs $MAX_SEQS
0 commit comments