diff --git a/.github/env/_build_image.sh b/.github/env/_build_image.sh
index 61b0d902ed..dea55d7e33 100644
--- a/.github/env/_build_image.sh
+++ b/.github/env/_build_image.sh
@@ -3,4 +3,4 @@
 # SPDX-License-Identifier: Apache-2.0
 
 export VLLM_VER=v0.10.0
-export VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+export VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
diff --git a/AgentQnA/docker_image_build/build.yaml b/AgentQnA/docker_image_build/build.yaml
index 7db63b6fa8..8c211f10f4 100644
--- a/AgentQnA/docker_image_build/build.yaml
+++ b/AgentQnA/docker_image_build/build.yaml
@@ -20,7 +20,7 @@ services:
   vllm-gaudi:
     build:
       context: vllm-fork
-      dockerfile: Dockerfile.hpu
+      dockerfile: docker/Dockerfile.hpu
     extends: agent
     image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
   vllm-rocm:
diff --git a/AgentQnA/tests/step1_build_images.sh b/AgentQnA/tests/step1_build_images.sh
index 58b5c8d6e8..e5821aaaec 100644
--- a/AgentQnA/tests/step1_build_images.sh
+++ b/AgentQnA/tests/step1_build_images.sh
@@ -41,7 +41,7 @@ function build_agent_docker_image_gaudi_vllm() {
     get_genai_comps
 
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build agent image with --no-cache..."
diff --git a/AudioQnA/docker_image_build/build.yaml b/AudioQnA/docker_image_build/build.yaml
index e7688555c1..a8956b2acf 100644
--- a/AudioQnA/docker_image_build/build.yaml
+++ b/AudioQnA/docker_image_build/build.yaml
@@ -82,7 +82,7 @@ services:
   vllm-gaudi:
     build:
       context: vllm-fork
-      dockerfile: Dockerfile.hpu
+      dockerfile: docker/Dockerfile.hpu
     extends: audioqna
     image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
   vllm-rocm:
diff --git a/AudioQnA/tests/test_compose_on_gaudi.sh b/AudioQnA/tests/test_compose_on_gaudi.sh
index c24f5ff82e..e29eb3dde3 100644
--- a/AudioQnA/tests/test_compose_on_gaudi.sh
+++ b/AudioQnA/tests/test_compose_on_gaudi.sh
@@ -27,7 +27,7 @@ function build_docker_images() {
 
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     echo "Check out vLLM tag ${VLLM_FORK_VER}"
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml
index 3a5f6d4be3..66ea63728b 100644
--- a/ChatQnA/docker_image_build/build.yaml
+++ b/ChatQnA/docker_image_build/build.yaml
@@ -123,7 +123,7 @@ services:
   vllm-gaudi:
     build:
       context: vllm-fork
-      dockerfile: Dockerfile.hpu
+      dockerfile: docker/Dockerfile.hpu
     extends: chatqna
     image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
   nginx:
diff --git a/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh b/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh
index 58ab7526c2..9848721495 100644
--- a/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh
@@ -24,7 +24,7 @@ function build_docker_images() {
     docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
     popd && sleep 1s
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
diff --git a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
index da8bc25b48..920b85191f 100644
--- a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
@@ -24,7 +24,7 @@ function build_docker_images() {
     docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
     popd && sleep 1s
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
diff --git a/ChatQnA/tests/test_compose_on_gaudi.sh b/ChatQnA/tests/test_compose_on_gaudi.sh
index 282b5be47d..139185b8ad 100644
--- a/ChatQnA/tests/test_compose_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_on_gaudi.sh
@@ -24,7 +24,7 @@ function build_docker_images() {
     docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
     popd && sleep 1s
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
diff --git a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
index 30a842cd1c..c30a19a1b7 100644
--- a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
@@ -24,7 +24,7 @@ function build_docker_images() {
     docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
     popd && sleep 1s
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
diff --git a/CodeGen/docker_image_build/build.yaml b/CodeGen/docker_image_build/build.yaml
index 282c29766c..686f0b46ff 100644
--- a/CodeGen/docker_image_build/build.yaml
+++ b/CodeGen/docker_image_build/build.yaml
@@ -52,7 +52,7 @@ services:
   vllm-gaudi:
     build:
       context: vllm-fork
-      dockerfile: Dockerfile.hpu
+      dockerfile: docker/Dockerfile.hpu
     extends: codegen
     image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
   dataprep:
diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh
index 516624827f..707bbd6d33 100644
--- a/CodeGen/tests/test_compose_on_gaudi.sh
+++ b/CodeGen/tests/test_compose_on_gaudi.sh
@@ -27,7 +27,7 @@ function build_docker_images() {
 
     # Download Gaudi vllm of latest tag
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     echo "Check out vLLM tag ${VLLM_FORK_VER}"
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
diff --git a/CodeTrans/docker_image_build/build.yaml b/CodeTrans/docker_image_build/build.yaml
index b230d1d4ec..76d0ad6c53 100644
--- a/CodeTrans/docker_image_build/build.yaml
+++ b/CodeTrans/docker_image_build/build.yaml
@@ -34,7 +34,7 @@ services:
   vllm-gaudi:
     build:
       context: vllm-fork
-      dockerfile: Dockerfile.hpu
+      dockerfile: docker/Dockerfile.hpu
     extends: codetrans
     image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
   nginx:
diff --git a/CodeTrans/tests/test_compose_on_gaudi.sh b/CodeTrans/tests/test_compose_on_gaudi.sh
index 07af411cc1..5c957e50e8 100644
--- a/CodeTrans/tests/test_compose_on_gaudi.sh
+++ b/CodeTrans/tests/test_compose_on_gaudi.sh
@@ -26,7 +26,7 @@ function build_docker_images() {
     popd && sleep 1s
 
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
diff --git a/DeepResearchAgent/tests/test_compose_on_gaudi.sh b/DeepResearchAgent/tests/test_compose_on_gaudi.sh
index c180640201..dfea0269ee 100644
--- a/DeepResearchAgent/tests/test_compose_on_gaudi.sh
+++ b/DeepResearchAgent/tests/test_compose_on_gaudi.sh
@@ -26,7 +26,7 @@ function build_docker_images() {
     docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
     popd && sleep 1s
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml
index b4a2eb9c54..9b1203b3d1 100644
--- a/DocSum/docker_image_build/build.yaml
+++ b/DocSum/docker_image_build/build.yaml
@@ -63,6 +63,6 @@ services:
   vllm-gaudi:
     build:
       context: vllm-fork
-      dockerfile: Dockerfile.hpu
+      dockerfile: docker/Dockerfile.hpu
     extends: docsum
     image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh
index 2e13e41c9d..30810bbee8 100644
--- a/DocSum/tests/test_compose_on_gaudi.sh
+++ b/DocSum/tests/test_compose_on_gaudi.sh
@@ -47,7 +47,7 @@ function build_docker_images() {
     popd && sleep 1s
 
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
diff --git a/FinanceAgent/docker_compose/intel/hpu/gaudi/README.md b/FinanceAgent/docker_compose/intel/hpu/gaudi/README.md
index 6dae7dbd48..a0c1e7249f 100644
--- a/FinanceAgent/docker_compose/intel/hpu/gaudi/README.md
+++ b/FinanceAgent/docker_compose/intel/hpu/gaudi/README.md
@@ -102,7 +102,7 @@ cd vllm-fork
 VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
 echo "Check out vLLM tag ${VLLM_VER}"
 git checkout ${VLLM_VER}
-docker build --no-cache -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
+docker build --no-cache -f docker/Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
 ```
 
 ## Validate Services
diff --git a/FinanceAgent/tests/test_compose_on_gaudi.sh b/FinanceAgent/tests/test_compose_on_gaudi.sh
index cb0f594422..3ec6c2920a 100644
--- a/FinanceAgent/tests/test_compose_on_gaudi.sh
+++ b/FinanceAgent/tests/test_compose_on_gaudi.sh
@@ -96,9 +96,9 @@ function build_vllm_docker_image() {
     fi
     cd ./vllm-fork
 
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     git checkout ${VLLM_FORK_VER} &> /dev/null
-    docker build --no-cache -f Dockerfile.hpu -t $VLLM_IMAGE --shm-size=128g . --build-arg https_proxy=$HTTPS_PROXY --build-arg http_proxy=$HTTP_PROXY
+    docker build --no-cache -f docker/Dockerfile.hpu -t $VLLM_IMAGE --shm-size=128g . --build-arg https_proxy=$HTTPS_PROXY --build-arg http_proxy=$HTTP_PROXY
     if [ $? -ne 0 ]; then
         echo "$VLLM_IMAGE failed"
         exit 1
diff --git a/VisualQnA/docker_image_build/build.yaml b/VisualQnA/docker_image_build/build.yaml
index e8b1240040..e3f82653fa 100644
--- a/VisualQnA/docker_image_build/build.yaml
+++ b/VisualQnA/docker_image_build/build.yaml
@@ -46,6 +46,6 @@ services:
   vllm-gaudi:
     build:
       context: vllm-fork
-      dockerfile: Dockerfile.hpu
+      dockerfile: docker/Dockerfile.hpu
     extends: visualqna
     image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
diff --git a/VisualQnA/tests/test_compose_on_gaudi.sh b/VisualQnA/tests/test_compose_on_gaudi.sh
index 1df520183c..1fd231506c 100644
--- a/VisualQnA/tests/test_compose_on_gaudi.sh
+++ b/VisualQnA/tests/test_compose_on_gaudi.sh
@@ -27,7 +27,7 @@ function build_docker_images() {
     popd && sleep 1s
 
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     service_list="visualqna visualqna-ui lvm nginx vllm-gaudi"
diff --git a/one_click_deploy/common/update_images.sh b/one_click_deploy/common/update_images.sh
index d78caa0eee..0da2876fc1 100644
--- a/one_click_deploy/common/update_images.sh
+++ b/one_click_deploy/common/update_images.sh
@@ -123,7 +123,7 @@ get_service_list() {
 # Defines the configurations for cloning prerequisite repositories.
 # A generic build function will use these settings.
 
-VLLM_FORK_VER="v0.6.6.post1+Gaudi-1.20.0"
+VLLM_FORK_VER=v0.8.5.post1+Gaudi-1.21.3
 
 # Config for examples using vLLM v0.8.3
 declare -A VLLM_8_3_CONFIG=(