Update LLM model workaround for the vllm issues (opea-project#2182)

chensuyue · pre-commit-ci[bot] · alexsin368 · commit 3c7b98049189 · 2025-08-13T12:14:32.000-07:00
Signed-off-by: chensuyue &lt;suyue.chen@intel.com&gt;
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
Signed-off-by: alexsin368 &lt;alex.sin@intel.com&gt;
diff --git a/CodeTrans/README.md b/CodeTrans/README.md
@@ -34,12 +34,12 @@ The table below lists currently available deployment options. They outline in de
 
 ## Validated Configurations
 
-| **Deploy Method** | **LLM Engine** | **LLM Model**                      | **Hardware** |
-| ----------------- | -------------- | ---------------------------------- | ------------ |
-| Docker Compose    | vLLM, TGI      | mistralai/Mistral-7B-Instruct-v0.3 | Intel Gaudi  |
-| Docker Compose    | vLLM, TGI      | mistralai/Mistral-7B-Instruct-v0.3 | Intel Xeon   |
-| Docker Compose    | vLLM, TGI      | Qwen/Qwen2.5-Coder-7B-Instruct     | AMD EPYC     |
-| Docker Compose    | vLLM, TGI      | Qwen/Qwen2.5-Coder-7B-Instruct     | AMD ROCm     |
-| Helm Charts       | vLLM, TGI      | mistralai/Mistral-7B-Instruct-v0.3 | Intel Gaudi  |
-| Helm Charts       | vLLM, TGI      | mistralai/Mistral-7B-Instruct-v0.3 | Intel Xeon   |
-| Helm Charts       | vLLM, TGI      | mistralai/Mistral-7B-Instruct-v0.3 | AMD ROCm     |
+| **Deploy Method** | **LLM Engine** | **LLM Model**                  | **Hardware** |
+| ----------------- | -------------- | ------------------------------ | ------------ |
+| Docker Compose    | vLLM, TGI      | Qwen/Qwen2.5-Coder-7B-Instruct | Intel Gaudi  |
+| Docker Compose    | vLLM, TGI      | Qwen/Qwen2.5-Coder-7B-Instruct | Intel Xeon   |
+| Docker Compose    | vLLM, TGI      | Qwen/Qwen2.5-Coder-7B-Instruct | AMD EPYC     |
+| Docker Compose    | vLLM, TGI      | Qwen/Qwen2.5-Coder-7B-Instruct | AMD ROCm     |
+| Helm Charts       | vLLM, TGI      | Qwen/Qwen2.5-Coder-7B-Instruct | Intel Gaudi  |
+| Helm Charts       | vLLM, TGI      | Qwen/Qwen2.5-Coder-7B-Instruct | Intel Xeon   |
+| Helm Charts       | vLLM, TGI      | Qwen/Qwen2.5-Coder-7B-Instruct | AMD ROCm     |
diff --git a/CodeTrans/code_translation.py b/CodeTrans/code_translation.py
@@ -1,7 +1,6 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import asyncio
 import os
 
 from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
diff --git a/CodeTrans/docker_compose/amd/cpu/epyc/README.md b/CodeTrans/docker_compose/amd/cpu/epyc/README.md
@@ -158,7 +158,7 @@ Key parameters are configured via environment variables set before running `dock
 | :-------------------------------------- | :-------------------------------------------------------------------------------------------------------------------- | :------------------------------------ |
 | `HOST_IP`                               | External IP address of the host machine. **Required.**                                                                | `your_external_ip_address`            |
 | `HF_TOKEN`                              | Your Hugging Face Hub token for model access. **Required.**                                                           | `your_huggingface_token`              |
-| `LLM_MODEL_ID`                          | Hugging Face model ID for the CodeTrans LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `mistralai/Mistral-7B-Instruct-v0.3`  |
+| `LLM_MODEL_ID`                          | Hugging Face model ID for the CodeTrans LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct`      |
 | `LLM_ENDPOINT`                          | Internal URL for the LLM serving endpoint (used by `codetrans-epyc-llm-server`). Configured in `compose.yaml`.        | `http://${HOST_IP}:8008`              |
 | `LLM_COMPONENT_NAME`                    | LLM component name for the LLM Microservice.                                                                          | `OpeaTextGenService`                  |
 | `BACKEND_SERVICE_ENDPOINT`              | External URL for the CodeTrans Gateway (MegaService). Derived from `HOST_IP` and port `7778`.                         | `http://${HOST_IP}:7777/v1/codetrans` |
diff --git a/CodeTrans/docker_compose/amd/cpu/epyc/set_env.sh b/CodeTrans/docker_compose/amd/cpu/epyc/set_env.sh
@@ -7,7 +7,7 @@
 host_ip=$(hostname -I | awk '{print $1}')
 export host_ip
 
-export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
+export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export LLM_ENDPOINT="http://${host_ip}:8008"
 export LLM_COMPONENT_NAME="OpeaTextGenService"
 export MODEL_CACHE=${model_cache:-"./data"}
diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/README.md b/CodeTrans/docker_compose/intel/cpu/xeon/README.md
@@ -125,7 +125,7 @@ Key parameters are configured via environment variables set before running `dock
 | :-------------------------------------- | :-------------------------------------------------------------------------------------------------------------------- | :------------------------------------ |
 | `HOST_IP`                               | External IP address of the host machine. **Required.**                                                                | `your_external_ip_address`            |
 | `HF_TOKEN`                              | Your Hugging Face Hub token for model access. **Required.**                                                           | `your_huggingface_token`              |
-| `LLM_MODEL_ID`                          | Hugging Face model ID for the CodeTrans LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `mistralai/Mistral-7B-Instruct-v0.3`  |
+| `LLM_MODEL_ID`                          | Hugging Face model ID for the CodeTrans LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct`      |
 | `LLM_ENDPOINT`                          | Internal URL for the LLM serving endpoint (used by `codetrans-xeon-llm-server`). Configured in `compose.yaml`.        | `http://${HOST_IP}:8008`              |
 | `LLM_COMPONENT_NAME`                    | LLM component name for the LLM Microservice.                                                                          | `OpeaTextGenService`                  |
 | `BACKEND_SERVICE_ENDPOINT`              | External URL for the CodeTrans Gateway (MegaService). Derived from `HOST_IP` and port `7778`.                         | `http://${HOST_IP}:7777/v1/codetrans` |
diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md
@@ -125,7 +125,7 @@ Key parameters are configured via environment variables set before running `dock
 | :-------------------------------------- | :-------------------------------------------------------------------------------------------------------------------- | :------------------------------------ |
 | `HOST_IP`                               | External IP address of the host machine. **Required.**                                                                | `your_external_ip_address`            |
 | `HF_TOKEN`                              | Your Hugging Face Hub token for model access. **Required.**                                                           | `your_huggingface_token`              |
-| `LLM_MODEL_ID`                          | Hugging Face model ID for the CodeTrans LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `mistralai/Mistral-7B-Instruct-v0.3`  |
+| `LLM_MODEL_ID`                          | Hugging Face model ID for the CodeTrans LLM (used by TGI/vLLM service). Configured within `compose.yaml` environment. | `Qwen/Qwen2.5-Coder-7B-Instruct`      |
 | `LLM_ENDPOINT`                          | Internal URL for the LLM serving endpoint (used by `codetrans-gaudi-llm-server`). Configured in `compose.yaml`.       | `http://${HOST_IP}:8008`              |
 | `LLM_COMPONENT_NAME`                    | LLM component name for the LLM Microservice.                                                                          | `OpeaTextGenService`                  |
 | `BACKEND_SERVICE_ENDPOINT`              | External URL for the CodeTrans Gateway (MegaService). Derived from `HOST_IP` and port `7778`.                         | `http://${HOST_IP}:7777/v1/codetrans` |
diff --git a/CodeTrans/docker_compose/intel/set_env.sh b/CodeTrans/docker_compose/intel/set_env.sh
@@ -9,7 +9,7 @@ source .set_env.sh
 popd > /dev/null
 
 export host_ip=$(hostname -I | awk '{print $1}')
-export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
+export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export LLM_ENDPOINT="http://${host_ip}:8008"
 export LLM_COMPONENT_NAME="OpeaTextGenService"
 export NUM_CARDS=1
diff --git a/CodeTrans/kubernetes/gmc/README.md b/CodeTrans/kubernetes/gmc/README.md
@@ -13,7 +13,7 @@ By default, the LLM model is set to a default value as listed below:
 
 |Service  |Model                    |
 |---------|-------------------------|
-|LLM      |mistralai/Mistral-7B-Instruct-v0.3|
+|LLM      |Qwen/Qwen2.5-Coder-7B-Instruct|
 
 Change the `MODEL_ID` in `codetrans_xeon.yaml` for your needs.
 
diff --git a/CodeTrans/kubernetes/gmc/codetrans_gaudi.yaml b/CodeTrans/kubernetes/gmc/codetrans_gaudi.yaml
@@ -29,6 +29,6 @@ spec:
         internalService:
           serviceName: tgi-gaudi-svc
           config:
-            MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3
+            MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
             endpoint: /generate
           isDownstreamService: true
diff --git a/CodeTrans/kubernetes/gmc/codetrans_xeon.yaml b/CodeTrans/kubernetes/gmc/codetrans_xeon.yaml
@@ -29,6 +29,6 @@ spec:
         internalService:
           serviceName: tgi-service
           config:
-            MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3
+            MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
             endpoint: /generate
           isDownstreamService: true