From fd3824d73aa52928d48148b4496cf506e486df1b Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 25 Apr 2025 14:19:23 +0700
Subject: [PATCH 01/24] Add Config for vLLM

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 .../amd/gpu/rocm/compose_vllm.yaml            | 265 ++++++++++++++++++
 .../amd/gpu/rocm/set_env_vllm.sh              |  44 +++
 2 files changed, 309 insertions(+)
 create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
 create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
new file mode 100644
index 0000000000..5e5c069223
--- /dev/null
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -0,0 +1,265 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  worker-finqa-agent:
+    image: opea/agent:latest
+    container_name: finqa-agent-endpoint
+    volumes:
+      - ${TOOLSET_PATH}:/home/user/tools/
+      - ${PROMPT_PATH}:/home/user/prompts/
+    ports:
+      - "9095:9095"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: react_llama
+      with_memory: false
+      recursion_limit: ${recursion_limit_worker}
+      llm_engine: vllm
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      model: ${LLM_MODEL_ID}
+      temperature: ${TEMPERATURE}
+      max_new_tokens: ${MAX_TOKENS}
+      stream: false
+      tools: /home/user/tools/finqa_agent_tools.yaml
+      custom_prompt: /home/user/prompts/finqa_prompt.py
+      require_human_feedback: false
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL_VECTOR: $REDIS_URL_VECTOR
+      REDIS_URL_KV: $REDIS_URL_KV
+      TEI_EMBEDDING_ENDPOINT: $TEI_EMBEDDING_ENDPOINT
+      port: 9095
+
+  worker-research-agent:
+    image: opea/agent:latest
+    container_name: research-agent-endpoint
+    volumes:
+      - ${TOOLSET_PATH}:/home/user/tools/
+      - ${PROMPT_PATH}:/home/user/prompts/
+    ports:
+      - "9096:9096"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: react_llama
+      with_memory: false
+      recursion_limit: 25
+      llm_engine: vllm
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      model: ${LLM_MODEL_ID}
+      stream: false
+      tools: /home/user/tools/research_agent_tools.yaml
+      custom_prompt: /home/user/prompts/research_prompt.py
+      require_human_feedback: false
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      FINNHUB_API_KEY: ${FINNHUB_API_KEY}
+      FINANCIAL_DATASETS_API_KEY: ${FINANCIAL_DATASETS_API_KEY}
+      port: 9096
+
+  supervisor-react-agent:
+    image: opea/agent:latest
+    container_name: supervisor-agent-endpoint
+    depends_on:
+      - worker-finqa-agent
+      - worker-research-agent
+    volumes:
+      - ${TOOLSET_PATH}:/home/user/tools/
+      - ${PROMPT_PATH}:/home/user/prompts/
+    ports:
+      - "9090:9090"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: react_llama
+      with_memory: true
+      recursion_limit: ${recursion_limit_supervisor}
+      llm_engine: vllm
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      model: ${LLM_MODEL_ID}
+      temperature: ${TEMPERATURE}
+      max_new_tokens: ${MAX_TOKENS}
+      stream: true
+      tools: /home/user/tools/supervisor_agent_tools.yaml
+      custom_prompt: /home/user/prompts/supervisor_prompt.py
+      require_human_feedback: false
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      WORKER_FINQA_AGENT_URL: $WORKER_FINQA_AGENT_URL
+      WORKER_RESEARCH_AGENT_URL: $WORKER_RESEARCH_AGENT_URL
+      DOCSUM_ENDPOINT: $DOCSUM_ENDPOINT
+      REDIS_URL_VECTOR: $REDIS_URL_VECTOR
+      REDIS_URL_KV: $REDIS_URL_KV
+      TEI_EMBEDDING_ENDPOINT: $TEI_EMBEDDING_ENDPOINT
+      port: 9090
+
+  vllm-service:
+    image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
+    container_name: docsum-vllm-service
+    ports:
+      - "${FINANCEAGENT_VLLM_SERVICE_PORT:-8081}:8011"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      VLLM_USE_TRITON_FLASH_ATTENTION: 0
+      PYTORCH_JIT: 0
+    healthcheck:
+      test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT:-8081}/health || exit 1" ]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    volumes:
+      - "${MODEL_CACHE:-./data}:/data"
+    shm_size: 20G
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri/:/dev/dri/
+    cap_add:
+      - SYS_PTRACE
+    group_add:
+      - video
+    security_opt:
+      - seccomp:unconfined
+      - apparmor=unconfined
+    command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
+    ipc: host
+
+  docsum-llm-textgen:
+    image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
+    container_name: docsum-vllm-service
+    ports:
+      - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      VLLM_USE_TRITON_FLASH_ATTENTION: 0
+      PYTORCH_JIT: 0
+    healthcheck:
+      test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT:-8081}/health || exit 1" ]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    volumes:
+      - "${MODEL_CACHE:-./data}:/data"
+    shm_size: 20G
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri/:/dev/dri/
+    cap_add:
+      - SYS_PTRACE
+    group_add:
+      - video
+    security_opt:
+      - seccomp:unconfined
+      - apparmor=unconfined
+    command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
+    ipc: host
+
+  agent-ui:
+    image: opea/agent-ui:latest
+    container_name: agent-ui
+    environment:
+      host_ip: ${host_ip}
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    ports:
+      - "5175:8080"
+    ipc: host
+
+  tei-embedding-serving:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-serving
+    entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
+    ports:
+      - "${TEI_EMBEDDER_PORT:-10221}:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      host_ip: ${host_ip}
+      HF_TOKEN: ${HF_TOKEN}
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"]
+      interval: 10s
+      timeout: 6s
+      retries: 48
+
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "${REDIS_PORT1:-6379}:6379"
+      - "${REDIS_PORT2:-8001}:8001"
+    environment:
+      - no_proxy=${no_proxy}
+      - http_proxy=${http_proxy}
+      - https_proxy=${https_proxy}
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      timeout: 10s
+      retries: 3
+      start_period: 10s
+
+  redis-kv-store:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-kv-store
+    ports:
+      - "${REDIS_PORT3:-6380}:6379"
+      - "${REDIS_PORT4:-8002}:8001"
+    environment:
+      - no_proxy=${no_proxy}
+      - http_proxy=${http_proxy}
+      - https_proxy=${https_proxy}
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      timeout: 10s
+      retries: 3
+      start_period: 10s
+
+  dataprep-redis-finance:
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
+    container_name: dataprep-redis-server-finance
+    depends_on:
+      redis-vector-db:
+        condition: service_healthy
+      redis-kv-store:
+        condition: service_healthy
+      tei-embedding-serving:
+        condition: service_healthy
+    ports:
+      - "${DATAPREP_PORT:-6007}:5000"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      DATAPREP_COMPONENT_NAME: ${DATAPREP_COMPONENT_NAME}
+      REDIS_URL_VECTOR: ${REDIS_URL_VECTOR}
+      REDIS_URL_KV: ${REDIS_URL_KV}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
+      LLM_MODEL: ${LLM_MODEL}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
+      LOGFLAG: true
diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
new file mode 100644
index 0000000000..acb0a484fb
--- /dev/null
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -0,0 +1,44 @@
+export ip_address=$(hostname -I | awk '{print $1}')
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export TOOLSET_PATH=$WORKDIR/GenAIExamples/FinanceAgent/tools/
+echo "TOOLSET_PATH=${TOOLSET_PATH}"
+export PROMPT_PATH=$WORKDIR/GenAIExamples/FinanceAgent/prompts/
+echo "PROMPT_PATH=${PROMPT_PATH}"
+export recursion_limit_worker=12
+export recursion_limit_supervisor=10
+
+vllm_port=8086
+export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
+export LLM_ENDPOINT_URL="http://${ip_address}:${vllm_port}"
+export TEMPERATURE=0.5
+export MAX_TOKENS=4096
+
+export WORKER_FINQA_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
+export WORKER_RESEARCH_AGENT_URL="http://${ip_address}:9096/v1/chat/completions"
+
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:10221"
+export REDIS_URL_VECTOR="redis://${ip_address}:6379"
+export REDIS_URL_KV="redis://${ip_address}:6380"
+
+export MAX_INPUT_TOKENS=2048
+export MAX_TOTAL_TOKENS=4096
+export DOCSUM_COMPONENT_NAME="OpeaDocSumvLLM"
+export DOCSUM_ENDPOINT="http://${ip_address}:9000/v1/docsum"
+
+export FINNHUB_API_KEY=${FINNHUB_API_KEY}
+export FINANCIAL_DATASETS_API_KEY=${FINANCIAL_DATASETS_API_KEY}
+
+
+export DATAPREP_PORT="6007"
+export TEI_EMBEDDER_PORT="10221"
+export REDIS_URL_VECTOR="redis://${ip_address}:6379"
+export REDIS_URL_KV="redis://${ip_address}:6380"
+export LLM_MODEL=$model
+export LLM_ENDPOINT="http://${ip_address}:${vllm_port}"
+export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS_FINANCE"
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}"
+
+export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
+export MAX_LEN=16384

From ee743837b943e8a678dae37ec9f4e4b009011b01 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 25 Apr 2025 14:38:45 +0700
Subject: [PATCH 02/24] Update compose_vllm.yaml

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 .../amd/gpu/rocm/compose_vllm.yaml            | 42 +++++++------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
index 5e5c069223..061262dbbb 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -138,40 +138,26 @@ services:
     ipc: host
 
   docsum-llm-textgen:
-    image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
-    container_name: docsum-vllm-service
+    image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
+    container_name: docsum-llm-server
+    depends_on:
+      docsum-vllm-service:
+        condition: service_healthy
     ports:
-      - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011"
+      - "${DOCSUM_LLM_SERVER_PORT}:9000"
+    ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
+      LLM_ENDPOINT: ${DOCSUM_LLM_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
-      HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      VLLM_USE_TRITON_FLASH_ATTENTION: 0
-      PYTORCH_JIT: 0
-    healthcheck:
-      test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT:-8081}/health || exit 1" ]
-      interval: 10s
-      timeout: 10s
-      retries: 100
-    volumes:
-      - "${MODEL_CACHE:-./data}:/data"
-    shm_size: 20G
-    devices:
-      - /dev/kfd:/dev/kfd
-      - /dev/dri/:/dev/dri/
-    cap_add:
-      - SYS_PTRACE
-    group_add:
-      - video
-    security_opt:
-      - seccomp:unconfined
-      - apparmor=unconfined
-    command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
-    ipc: host
+      MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS}
+      MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS}
+      LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
+      DocSum_COMPONENT_NAME: "DocSum_COMPONENT_NAME:-OpeaDocSumvLLM"
+      LOGFLAG: ${LOGFLAG:-False}
+    restart: unless-stopped
 
   agent-ui:
     image: opea/agent-ui:latest

From 6c50388faccb7d54a3bdcd44f95d5eeb2fa3dac7 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 25 Apr 2025 14:40:59 +0700
Subject: [PATCH 03/24] Update compose_vllm.yaml

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
index 061262dbbb..40e43171d1 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -141,7 +141,7 @@ services:
     image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
     container_name: docsum-llm-server
     depends_on:
-      docsum-vllm-service:
+      vllm-service:
         condition: service_healthy
     ports:
       - "${DOCSUM_LLM_SERVER_PORT}:9000"

From f762e43bd82bb8b416561bc2135f595f0049de06 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 25 Apr 2025 14:52:23 +0700
Subject: [PATCH 04/24] Update example config

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 .../docker_compose/amd/gpu/rocm/compose_vllm.yaml         | 8 ++++----
 FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh  | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
index 40e43171d1..180e5923a0 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -110,14 +110,14 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
-      HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       VLLM_USE_TRITON_FLASH_ATTENTION: 0
       PYTORCH_JIT: 0
     healthcheck:
-      test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT:-8081}/health || exit 1" ]
+      test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${FINANCEAGENT_VLLM_SERVICE_PORT:-8081}/health || exit 1" ]
       interval: 10s
       timeout: 10s
       retries: 100
@@ -134,7 +134,7 @@ services:
     security_opt:
       - seccomp:unconfined
       - apparmor=unconfined
-    command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
+    command: "--model ${LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
     ipc: host
 
   docsum-llm-textgen:
diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
index acb0a484fb..0b7ff0aa51 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -7,7 +7,8 @@ echo "PROMPT_PATH=${PROMPT_PATH}"
 export recursion_limit_worker=12
 export recursion_limit_supervisor=10
 
-vllm_port=8086
+export vllm_port=8086
+export FINANCEAGENT_VLLM_SERVICE_PORT=${vllm_port}
 export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
 export LLM_ENDPOINT_URL="http://${ip_address}:${vllm_port}"
 export TEMPERATURE=0.5

From 277a698670db07a67271d682859eb71d536c09f2 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 25 Apr 2025 14:55:46 +0700
Subject: [PATCH 05/24] Update set_env_vllm.sh

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
index 0b7ff0aa51..82ccff1caf 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -1,5 +1,5 @@
 export ip_address=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/FinanceAgent/tools/
 echo "TOOLSET_PATH=${TOOLSET_PATH}"
 export PROMPT_PATH=$WORKDIR/GenAIExamples/FinanceAgent/prompts/

From c5e3ab29886ea5b2916b4b26ab53df0dfd0989cb Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 25 Apr 2025 15:08:41 +0700
Subject: [PATCH 06/24] Update set_env_vllm.sh

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
index 82ccff1caf..3d0b258c0f 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -1,5 +1,6 @@
 export ip_address=$(hostname -I | awk '{print $1}')
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/FinanceAgent/tools/
 echo "TOOLSET_PATH=${TOOLSET_PATH}"
 export PROMPT_PATH=$WORKDIR/GenAIExamples/FinanceAgent/prompts/

From 14c2fbbea10c6cf3c51d45f5295622d9a1903d12 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 25 Apr 2025 15:39:26 +0700
Subject: [PATCH 07/24] Update set_env_vllm.sh

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
index 3d0b258c0f..86c71187bb 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
@@ -1,4 +1,5 @@
 export ip_address=$(hostname -I | awk '{print $1}')
+export HOST_IP=${ip_address}
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/FinanceAgent/tools/
@@ -10,7 +11,7 @@ export recursion_limit_supervisor=10
 
 export vllm_port=8086
 export FINANCEAGENT_VLLM_SERVICE_PORT=${vllm_port}
-export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export LLM_ENDPOINT_URL="http://${ip_address}:${vllm_port}"
 export TEMPERATURE=0.5
 export MAX_TOKENS=4096
@@ -42,5 +43,4 @@ export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS_FINANCE"
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}"
 
-export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
 export MAX_LEN=16384

From a4f154fce4a5e909b023d65d25a72057cf192f5f Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 25 Apr 2025 15:40:51 +0700
Subject: [PATCH 08/24] Update compose_vllm.yaml

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 .../docker_compose/amd/gpu/rocm/compose_vllm.yaml      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
index 180e5923a0..0d09789f73 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -150,11 +150,11 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      LLM_ENDPOINT: ${DOCSUM_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
-      MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS}
-      MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS}
-      LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      MAX_INPUT_TOKENS: ${INPUT_TOKENS}
+      MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
       DocSum_COMPONENT_NAME: "DocSum_COMPONENT_NAME:-OpeaDocSumvLLM"
       LOGFLAG: ${LOGFLAG:-False}
     restart: unless-stopped

From 8cf40257bb9b964ce88f5b58b6f2a5a53838daf0 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 25 Apr 2025 15:52:13 +0700
Subject: [PATCH 09/24] Update compose_vllm.yaml

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
index 0d09789f73..b9c8a8f7e0 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -184,7 +184,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      host_ip: ${host_ip}
+      host_ip: ${HOST_IP}
       HF_TOKEN: ${HF_TOKEN}
     healthcheck:
       test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"]

From b38ec3e03d6d1ff7c60aec670546610d0e9c8c3a Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Mon, 28 Apr 2025 15:08:32 +0700
Subject: [PATCH 10/24] Refactor FinanceAgent for rocm

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 .../amd/gpu/rocm/.set_env_vllm.sh.kate-swp    | Bin 0 -> 749 bytes
 .../docker_compose/amd/gpu/rocm/compose.yaml  | 135 +++++++++++
 .../amd/gpu/rocm/compose_vllm.yaml            | 214 +-----------------
 .../amd/gpu/rocm/dataprep_compose.yaml        |  82 +++++++
 .../{set_env_vllm.sh => launch_agents.sh}     |  19 +-
 .../amd/gpu/rocm/launch_dataprep.sh           |  15 ++
 .../amd/gpu/rocm/launch_vllm.sh               |   7 +
 7 files changed, 246 insertions(+), 226 deletions(-)
 create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp
 create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml
 create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/dataprep_compose.yaml
 rename FinanceAgent/docker_compose/amd/gpu/rocm/{set_env_vllm.sh => launch_agents.sh} (71%)
 create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/launch_dataprep.sh
 create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp b/FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp
new file mode 100644
index 0000000000000000000000000000000000000000..5d6d484bbdb4ba4f0504a0b5b8b09968896ecfd2
GIT binary patch
literal 749
zcmYk(NlF7j7>41>JeYZo>BMf#oTxDhf{LyphzMc~xNs9B3q`?&;6Xf);KoCE2Jc|M
z#V1&hm;Xu8O{HRtDO@aHSML06`Qn~EJgwa6_QV)d%9cMTkMBPp&hz5->;C)3Yc=oY
z*Z$7f|FF)%$d1PBf}wB>2Eqa83suk)CZH?qgO1>Ww$NDk!<dZ7kPJwl^hlR<X7+fV
zKeA4Fhs)%z0chFL7<;^?u*HreIH2(#b-|vsCfKu9g+rdH2=*FE!WBD3;S%J93dsFO
zRyblOBkX{*Fa;^043a_{Bm{ePalvCJCfEy!3Y+XigcA@JMj#~E;{*kJI{~50m3zb`
z8>B^=#3A;NTW6}18mW>Bu`ehw6-j~QNseSmhNMY~tdS&1kT{8vD2b3T36UT_<8STp
HO<?v5(bF~^

literal 0
HcmV?d00001

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml
new file mode 100644
index 0000000000..62af173adb
--- /dev/null
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml
@@ -0,0 +1,135 @@
+# Copyright (C) 2025 Advanced Micro Devices, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  worker-finqa-agent:
+    image: opea/agent:latest
+    container_name: finqa-agent-endpoint
+    volumes:
+      - ${TOOLSET_PATH}:/home/user/tools/
+      - ${PROMPT_PATH}:/home/user/prompts/
+    ports:
+      - "9095:9095"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: react_llama
+      with_memory: false
+      recursion_limit: ${recursion_limit_worker}
+      llm_engine: vllm
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      model: ${LLM_MODEL_ID}
+      temperature: ${TEMPERATURE}
+      max_new_tokens: ${MAX_TOKENS}
+      stream: false
+      tools: /home/user/tools/finqa_agent_tools.yaml
+      custom_prompt: /home/user/prompts/finqa_prompt.py
+      require_human_feedback: false
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL_VECTOR: $REDIS_URL_VECTOR
+      REDIS_URL_KV: $REDIS_URL_KV
+      TEI_EMBEDDING_ENDPOINT: $TEI_EMBEDDING_ENDPOINT
+      port: 9095
+
+  worker-research-agent:
+    image: opea/agent:latest
+    container_name: research-agent-endpoint
+    volumes:
+      - ${TOOLSET_PATH}:/home/user/tools/
+      - ${PROMPT_PATH}:/home/user/prompts/
+    ports:
+      - "9096:9096"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: react_llama
+      with_memory: false
+      recursion_limit: 25
+      llm_engine: vllm
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      model: ${LLM_MODEL_ID}
+      stream: false
+      tools: /home/user/tools/research_agent_tools.yaml
+      custom_prompt: /home/user/prompts/research_prompt.py
+      require_human_feedback: false
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      FINNHUB_API_KEY: ${FINNHUB_API_KEY}
+      FINANCIAL_DATASETS_API_KEY: ${FINANCIAL_DATASETS_API_KEY}
+      port: 9096
+
+  supervisor-react-agent:
+    image: opea/agent:latest
+    container_name: supervisor-agent-endpoint
+    depends_on:
+      - worker-finqa-agent
+      - worker-research-agent
+    volumes:
+      - ${TOOLSET_PATH}:/home/user/tools/
+      - ${PROMPT_PATH}:/home/user/prompts/
+    ports:
+      - "9090:9090"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: react_llama
+      with_memory: true
+      recursion_limit: ${recursion_limit_supervisor}
+      llm_engine: vllm
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
+      model: ${LLM_MODEL_ID}
+      temperature: ${TEMPERATURE}
+      max_new_tokens: ${MAX_TOKENS}
+      stream: true
+      tools: /home/user/tools/supervisor_agent_tools.yaml
+      custom_prompt: /home/user/prompts/supervisor_prompt.py
+      require_human_feedback: false
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      WORKER_FINQA_AGENT_URL: $WORKER_FINQA_AGENT_URL
+      WORKER_RESEARCH_AGENT_URL: $WORKER_RESEARCH_AGENT_URL
+      DOCSUM_ENDPOINT: $DOCSUM_ENDPOINT
+      REDIS_URL_VECTOR: $REDIS_URL_VECTOR
+      REDIS_URL_KV: $REDIS_URL_KV
+      TEI_EMBEDDING_ENDPOINT: $TEI_EMBEDDING_ENDPOINT
+      port: 9090
+  docsum-llm-textgen:
+    image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
+    container_name: docsum-llm-server
+    depends_on:
+      vllm-service:
+        condition: service_healthy
+    ports:
+      - "${DOCSUM_LLM_SERVER_PORT}:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      MAX_INPUT_TOKENS: ${INPUT_TOKENS}
+      MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      DocSum_COMPONENT_NAME: "DocSum_COMPONENT_NAME:-OpeaDocSumvLLM"
+      LOGFLAG: ${LOGFLAG:-False}
+    restart: unless-stopped
+
+  agent-ui:
+    image: opea/agent-ui:latest
+    container_name: agent-ui
+    environment:
+      host_ip: ${host_ip}
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    ports:
+      - "5175:8080"
+    ipc: host
diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
index b9c8a8f7e0..ec1820053e 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -1,106 +1,7 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2025 Advanced Micro Devices, Inc.
 # SPDX-License-Identifier: Apache-2.0
 
 services:
-  worker-finqa-agent:
-    image: opea/agent:latest
-    container_name: finqa-agent-endpoint
-    volumes:
-      - ${TOOLSET_PATH}:/home/user/tools/
-      - ${PROMPT_PATH}:/home/user/prompts/
-    ports:
-      - "9095:9095"
-    ipc: host
-    environment:
-      ip_address: ${ip_address}
-      strategy: react_llama
-      with_memory: false
-      recursion_limit: ${recursion_limit_worker}
-      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      llm_endpoint_url: ${LLM_ENDPOINT_URL}
-      model: ${LLM_MODEL_ID}
-      temperature: ${TEMPERATURE}
-      max_new_tokens: ${MAX_TOKENS}
-      stream: false
-      tools: /home/user/tools/finqa_agent_tools.yaml
-      custom_prompt: /home/user/prompts/finqa_prompt.py
-      require_human_feedback: false
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      REDIS_URL_VECTOR: $REDIS_URL_VECTOR
-      REDIS_URL_KV: $REDIS_URL_KV
-      TEI_EMBEDDING_ENDPOINT: $TEI_EMBEDDING_ENDPOINT
-      port: 9095
-
-  worker-research-agent:
-    image: opea/agent:latest
-    container_name: research-agent-endpoint
-    volumes:
-      - ${TOOLSET_PATH}:/home/user/tools/
-      - ${PROMPT_PATH}:/home/user/prompts/
-    ports:
-      - "9096:9096"
-    ipc: host
-    environment:
-      ip_address: ${ip_address}
-      strategy: react_llama
-      with_memory: false
-      recursion_limit: 25
-      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      llm_endpoint_url: ${LLM_ENDPOINT_URL}
-      model: ${LLM_MODEL_ID}
-      stream: false
-      tools: /home/user/tools/research_agent_tools.yaml
-      custom_prompt: /home/user/prompts/research_prompt.py
-      require_human_feedback: false
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      FINNHUB_API_KEY: ${FINNHUB_API_KEY}
-      FINANCIAL_DATASETS_API_KEY: ${FINANCIAL_DATASETS_API_KEY}
-      port: 9096
-
-  supervisor-react-agent:
-    image: opea/agent:latest
-    container_name: supervisor-agent-endpoint
-    depends_on:
-      - worker-finqa-agent
-      - worker-research-agent
-    volumes:
-      - ${TOOLSET_PATH}:/home/user/tools/
-      - ${PROMPT_PATH}:/home/user/prompts/
-    ports:
-      - "9090:9090"
-    ipc: host
-    environment:
-      ip_address: ${ip_address}
-      strategy: react_llama
-      with_memory: true
-      recursion_limit: ${recursion_limit_supervisor}
-      llm_engine: vllm
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      llm_endpoint_url: ${LLM_ENDPOINT_URL}
-      model: ${LLM_MODEL_ID}
-      temperature: ${TEMPERATURE}
-      max_new_tokens: ${MAX_TOKENS}
-      stream: true
-      tools: /home/user/tools/supervisor_agent_tools.yaml
-      custom_prompt: /home/user/prompts/supervisor_prompt.py
-      require_human_feedback: false
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      WORKER_FINQA_AGENT_URL: $WORKER_FINQA_AGENT_URL
-      WORKER_RESEARCH_AGENT_URL: $WORKER_RESEARCH_AGENT_URL
-      DOCSUM_ENDPOINT: $DOCSUM_ENDPOINT
-      REDIS_URL_VECTOR: $REDIS_URL_VECTOR
-      REDIS_URL_KV: $REDIS_URL_KV
-      TEI_EMBEDDING_ENDPOINT: $TEI_EMBEDDING_ENDPOINT
-      port: 9090
-
   vllm-service:
     image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
     container_name: docsum-vllm-service
@@ -136,116 +37,3 @@ services:
       - apparmor=unconfined
     command: "--model ${LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
     ipc: host
-
-  docsum-llm-textgen:
-    image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
-    container_name: docsum-llm-server
-    depends_on:
-      vllm-service:
-        condition: service_healthy
-    ports:
-      - "${DOCSUM_LLM_SERVER_PORT}:9000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      LLM_ENDPOINT: ${LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      MAX_INPUT_TOKENS: ${INPUT_TOKENS}
-      MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
-      LLM_MODEL_ID: ${LLM_MODEL_ID}
-      DocSum_COMPONENT_NAME: "DocSum_COMPONENT_NAME:-OpeaDocSumvLLM"
-      LOGFLAG: ${LOGFLAG:-False}
-    restart: unless-stopped
-
-  agent-ui:
-    image: opea/agent-ui:latest
-    container_name: agent-ui
-    environment:
-      host_ip: ${host_ip}
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-    ports:
-      - "5175:8080"
-    ipc: host
-
-  tei-embedding-serving:
-    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    container_name: tei-embedding-serving
-    entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
-    ports:
-      - "${TEI_EMBEDDER_PORT:-10221}:80"
-    volumes:
-      - "./data:/data"
-    shm_size: 1g
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      host_ip: ${HOST_IP}
-      HF_TOKEN: ${HF_TOKEN}
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"]
-      interval: 10s
-      timeout: 6s
-      retries: 48
-
-  redis-vector-db:
-    image: redis/redis-stack:7.2.0-v9
-    container_name: redis-vector-db
-    ports:
-      - "${REDIS_PORT1:-6379}:6379"
-      - "${REDIS_PORT2:-8001}:8001"
-    environment:
-      - no_proxy=${no_proxy}
-      - http_proxy=${http_proxy}
-      - https_proxy=${https_proxy}
-    healthcheck:
-      test: ["CMD", "redis-cli", "ping"]
-      timeout: 10s
-      retries: 3
-      start_period: 10s
-
-  redis-kv-store:
-    image: redis/redis-stack:7.2.0-v9
-    container_name: redis-kv-store
-    ports:
-      - "${REDIS_PORT3:-6380}:6379"
-      - "${REDIS_PORT4:-8002}:8001"
-    environment:
-      - no_proxy=${no_proxy}
-      - http_proxy=${http_proxy}
-      - https_proxy=${https_proxy}
-    healthcheck:
-      test: ["CMD", "redis-cli", "ping"]
-      timeout: 10s
-      retries: 3
-      start_period: 10s
-
-  dataprep-redis-finance:
-    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
-    container_name: dataprep-redis-server-finance
-    depends_on:
-      redis-vector-db:
-        condition: service_healthy
-      redis-kv-store:
-        condition: service_healthy
-      tei-embedding-serving:
-        condition: service_healthy
-    ports:
-      - "${DATAPREP_PORT:-6007}:5000"
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      DATAPREP_COMPONENT_NAME: ${DATAPREP_COMPONENT_NAME}
-      REDIS_URL_VECTOR: ${REDIS_URL_VECTOR}
-      REDIS_URL_KV: ${REDIS_URL_KV}
-      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      LLM_ENDPOINT: ${LLM_ENDPOINT}
-      LLM_MODEL: ${LLM_MODEL}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
-      LOGFLAG: true
diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/dataprep_compose.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/dataprep_compose.yaml
new file mode 100644
index 0000000000..b5eaf1f77b
--- /dev/null
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/dataprep_compose.yaml
@@ -0,0 +1,82 @@
+# Copyright (C) 2025 Advanced Micro Devices, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  tei-embedding-serving:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-serving
+    entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
+    ports:
+      - "${TEI_EMBEDDER_PORT:-10221}:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      host_ip: ${HOST_IP}
+      HF_TOKEN: ${HF_TOKEN}
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"]
+      interval: 10s
+      timeout: 6s
+      retries: 48
+
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "${REDIS_PORT1:-6379}:6379"
+      - "${REDIS_PORT2:-8001}:8001"
+    environment:
+      - no_proxy=${no_proxy}
+      - http_proxy=${http_proxy}
+      - https_proxy=${https_proxy}
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      timeout: 10s
+      retries: 3
+      start_period: 10s
+
+  redis-kv-store:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-kv-store
+    ports:
+      - "${REDIS_PORT3:-6380}:6379"
+      - "${REDIS_PORT4:-8002}:8001"
+    environment:
+      - no_proxy=${no_proxy}
+      - http_proxy=${http_proxy}
+      - https_proxy=${https_proxy}
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      timeout: 10s
+      retries: 3
+      start_period: 10s
+
+  dataprep-redis-finance:
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
+    container_name: dataprep-redis-server-finance
+    depends_on:
+      redis-vector-db:
+        condition: service_healthy
+      redis-kv-store:
+        condition: service_healthy
+      tei-embedding-serving:
+        condition: service_healthy
+    ports:
+      - "${DATAPREP_PORT:-6007}:5000"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      DATAPREP_COMPONENT_NAME: ${DATAPREP_COMPONENT_NAME}
+      REDIS_URL_VECTOR: ${REDIS_URL_VECTOR}
+      REDIS_URL_KV: ${REDIS_URL_KV}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
+      LLM_MODEL: ${LLM_MODEL}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
+      LOGFLAG: true
diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh
similarity index 71%
rename from FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
rename to FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh
index 86c71187bb..f3ab57099d 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh
@@ -1,4 +1,8 @@
+# Copyright (C) 2025 Advanced Micro Devices, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
 export ip_address=$(hostname -I | awk '{print $1}')
+export WORKDIR=${PWD}
 export HOST_IP=${ip_address}
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
@@ -11,7 +15,7 @@ export recursion_limit_supervisor=10
 
 export vllm_port=8086
 export FINANCEAGENT_VLLM_SERVICE_PORT=${vllm_port}
-export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export LLM_ENDPOINT_URL="http://${ip_address}:${vllm_port}"
 export TEMPERATURE=0.5
 export MAX_TOKENS=4096
@@ -32,15 +36,4 @@ export DOCSUM_ENDPOINT="http://${ip_address}:9000/v1/docsum"
 export FINNHUB_API_KEY=${FINNHUB_API_KEY}
 export FINANCIAL_DATASETS_API_KEY=${FINANCIAL_DATASETS_API_KEY}
 
-
-export DATAPREP_PORT="6007"
-export TEI_EMBEDDER_PORT="10221"
-export REDIS_URL_VECTOR="redis://${ip_address}:6379"
-export REDIS_URL_KV="redis://${ip_address}:6380"
-export LLM_MODEL=$model
-export LLM_ENDPOINT="http://${ip_address}:${vllm_port}"
-export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS_FINANCE"
-export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}"
-
-export MAX_LEN=16384
+docker compose -f compose.yaml up -d
diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_dataprep.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_dataprep.sh
new file mode 100644
index 0000000000..31762da9d3
--- /dev/null
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_dataprep.sh
@@ -0,0 +1,15 @@
+# Copyright (C) 2025 2025 Advanced Micro Devices, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+export host_ip=${ip_address}
+export DATAPREP_PORT="6007"
+export TEI_EMBEDDER_PORT="10221"
+export REDIS_URL_VECTOR="redis://${ip_address}:6379"
+export REDIS_URL_KV="redis://${ip_address}:6380"
+export LLM_MODEL=$model
+export LLM_ENDPOINT="http://${ip_address}:${vllm_port}"
+export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS_FINANCE"
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}"
+
+docker compose -f dataprep_compose.yaml up -d
diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
new file mode 100644
index 0000000000..0e13c7f9f9
--- /dev/null
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
@@ -0,0 +1,7 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export MAX_LEN=16384
+
+docker compose -f vllm_compose.yaml up -d

From aaf7f86c8872f8bd279e00ea9809101835b9dc94 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Mon, 28 Apr 2025 17:49:57 +0700
Subject: [PATCH 11/24] adjust rocm example

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 .../amd/gpu/rocm/compose_vllm.yaml            |   2 +-
 .../amd/gpu/rocm/launch_vllm.sh               |   2 +-
 FinanceAgent/docker_image_build/build.yaml    |   5 +
 .../tests/test_compose_on_vllm_rocm.sh        | 242 ++++++++++++++++++
 4 files changed, 249 insertions(+), 2 deletions(-)
 create mode 100644 FinanceAgent/tests/test_compose_on_vllm_rocm.sh

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
index ec1820053e..8fe2226d0b 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml
@@ -4,7 +4,7 @@
 services:
   vllm-service:
     image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
-    container_name: docsum-vllm-service
+    container_name: vllm-service
     ports:
       - "${FINANCEAGENT_VLLM_SERVICE_PORT:-8081}:8011"
     environment:
diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
index 0e13c7f9f9..5d8d58641b 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
@@ -1,7 +1,7 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
 export MAX_LEN=16384
 
 docker compose -f vllm_compose.yaml up -d
diff --git a/FinanceAgent/docker_image_build/build.yaml b/FinanceAgent/docker_image_build/build.yaml
index 7d113148a3..23d1af7b76 100644
--- a/FinanceAgent/docker_image_build/build.yaml
+++ b/FinanceAgent/docker_image_build/build.yaml
@@ -20,3 +20,8 @@ services:
         https_proxy: ${https_proxy}
         no_proxy: ${no_proxy}
     image: ${REGISTRY:-opea}/agent:${TAG:-latest}
+  vllm-rocm:
+    build:
+      context: GenAIComps
+      dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
+    image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
diff --git a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh
new file mode 100644
index 0000000000..e361719e28
--- /dev/null
+++ b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh
@@ -0,0 +1,242 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+set -xe
+
+export WORKPATH=$(dirname "$PWD")
+export WORKDIR=$WORKPATH/../../
+echo "WORKDIR=${WORKDIR}"
+export ip_address=$(hostname -I | awk '{print $1}')
+LOG_PATH=$WORKPATH
+
+#### env vars for LLM endpoint #############
+model=meta-llama/Llama-3.3-70B-Instruct
+vllm_image=opea/vllm-rocm:latest
+vllm_port=8086
+vllm_image=$vllm_image
+HF_CACHE_DIR=${model_cache:-"/data2/huggingface"}
+vllm_volume=${HF_CACHE_DIR}
+#######################################
+
+#### env vars for dataprep #############
+export host_ip=${ip_address}
+export DATAPREP_PORT="6007"
+export TEI_EMBEDDER_PORT="10221"
+export REDIS_URL_VECTOR="redis://${ip_address}:6379"
+export REDIS_URL_KV="redis://${ip_address}:6380"
+export LLM_MODEL=$model
+export LLM_ENDPOINT="http://${ip_address}:${vllm_port}"
+export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS_FINANCE"
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}"
+#######################################
+
+
+
+function get_genai_comps() {
+    if [ ! -d "GenAIComps" ] ; then
+        git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git
+    fi
+}
+
+function build_dataprep_agent_and_vllm_images() {
+    cd $WORKDIR/GenAIExamples/FinanceAgent/docker_image_build/
+    get_genai_comps
+    echo "Build agent image with --no-cache..."
+    docker compose -f build.yaml build --no-cache
+}
+
+function build_agent_image_local(){
+    cd $WORKDIR/GenAIComps/
+    docker build -t opea/agent:latest -f comps/agent/src/Dockerfile . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
+}
+
+function start_vllm_service {
+    echo "start vllm gaudi service"
+    docker compose -f $WORKPATH/docker_compose/amd/gpu/rocm/compose_vllm.yaml up -d
+    sleep 1m
+    echo "Waiting vllm rocm ready"
+    n=0
+    until [[ "$n" -ge 500 ]]; do
+        docker logs vllm-service >& "${LOG_PATH}"/vllm-service_start.log
+        if grep -q "Application startup complete" "${LOG_PATH}"/vllm-service_start.log; then
+            break
+        fi
+        sleep 10s
+        n=$((n+1))
+    done
+    sleep 10s
+    echo "Service started successfully"
+}
+
+
+function stop_llm(){
+    cid=$(docker ps -aq --filter "name=vllm-service")
+    echo "Stopping container $cid"
+    if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+
+}
+
+function start_dataprep(){
+    docker compose -f $WORKPATH/docker_compose/amd/gpu/rocm/dataprep_compose.yaml up -d
+    sleep 1m
+}
+
+function validate() {
+    local CONTENT="$1"
+    local EXPECTED_RESULT="$2"
+    local SERVICE_NAME="$3"
+    echo "EXPECTED_RESULT: $EXPECTED_RESULT"
+    echo "Content: $CONTENT"
+    if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+        echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
+        echo 0
+    else
+        echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+        echo 1
+    fi
+}
+
+function ingest_validate_dataprep() {
+    # test /v1/dataprep/ingest
+    echo "=========== Test ingest ==========="
+    local CONTENT=$(python3 $WORKPATH/tests/test_redis_finance.py --port $DATAPREP_PORT --test_option ingest)
+    local EXIT_CODE=$(validate "$CONTENT" "200" "dataprep-redis-finance")
+    echo "$EXIT_CODE"
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs dataprep-redis-server-finance
+        exit 1
+    fi
+
+    # test /v1/dataprep/get
+    echo "=========== Test get ==========="
+    local CONTENT=$(python $WORKPATH/tests/test_redis_finance.py --port $DATAPREP_PORT --test_option get)
+    local EXIT_CODE=$(validate "$CONTENT" "Request successful" "dataprep-redis-finance")
+    echo "$EXIT_CODE"
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs dataprep-redis-server-finance
+        exit 1
+    fi
+}
+
+function stop_dataprep() {
+    echo "Stopping databases"
+    cid=$(docker ps -aq --filter "name=dataprep-redis-server*" --filter "name=redis-*" --filter "name=tei-embedding-*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+
+}
+
+function start_agents() {
+    echo "Starting Agent services"
+    cd $WORKDIR/GenAIExamples/FinanceAgent/docker_compose/amd/gpu/rocm/
+    bash launch_agents.sh
+    sleep 2m
+}
+
+
+function validate_agent_service() {
+    # # test worker finqa agent
+    echo "======================Testing worker finqa agent======================"
+    export agent_port="9095"
+    prompt="What is Gap's revenue in 2024?"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port)
+    echo $CONTENT
+    local EXIT_CODE=$(validate "$CONTENT" "15" "finqa-agent-endpoint")
+    echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs finqa-agent-endpoint
+        exit 1
+    fi
+
+    # # test worker research agent
+    echo "======================Testing worker research agent======================"
+    export agent_port="9096"
+    prompt="Johnson & Johnson"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port --tool_choice "get_current_date" --tool_choice "get_share_performance")
+    local EXIT_CODE=$(validate "$CONTENT" "Johnson" "research-agent-endpoint")
+    echo $CONTENT
+    echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+	docker logs research-agent-endpoint
+	exit 1
+    fi
+
+    # test supervisor react agent
+    echo "======================Testing supervisor agent: single turns ======================"
+    export agent_port="9090"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream)
+    echo $CONTENT
+    local EXIT_CODE=$(validate "$CONTENT" "test completed with success" "supervisor-agent-endpoint")
+    echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs supervisor-agent-endpoint
+        exit 1
+    fi
+
+    # echo "======================Testing supervisor agent: multi turns ======================"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --multi-turn --stream)
+    echo $CONTENT
+    local EXIT_CODE=$(validate "$CONTENT" "test completed with success" "supervisor-agent-endpoint")
+    echo $EXIT_CODE
+    local EXIT_CODE="${EXIT_CODE:0-1}"
+    if [ "$EXIT_CODE" == "1" ]; then
+        docker logs supervisor-agent-endpoint
+        exit 1
+    fi
+
+}
+
+function stop_agent_docker() {
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
+    container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
+    for container_name in $container_list; do
+        cid=$(docker ps -aq --filter "name=$container_name")
+        echo "Stopping container $container_name"
+        if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+    done
+}
+
+
+echo "workpath: $WORKPATH"
+echo "=================== Stop containers ===================="
+stop_llm
+stop_agent_docker
+stop_dataprep
+
+cd $WORKPATH/tests
+
+echo "=================== #1 Building docker images===================="
+build_dataprep_agent_and_vllm_images
+
+#### for local test
+# build_agent_image_local
+# echo "=================== #1 Building docker images completed===================="
+
+echo "=================== #2 Start vllm endpoint===================="
+start_vllm_service
+echo "=================== #2 vllm endpoint started===================="
+
+echo "=================== #3 Start dataprep and ingest data ===================="
+start_dataprep
+ingest_validate_dataprep
+echo "=================== #3 Data ingestion and validation completed===================="
+
+echo "=================== #4 Start agents ===================="
+start_agents
+validate_agent_service
+echo "=================== #4 Agent test passed ===================="
+
+echo "=================== #5 Stop microservices ===================="
+stop_agent_docker
+stop_dataprep
+stop_llm
+echo "=================== #5 Microservices stopped===================="
+
+echo y | docker system prune
+
+echo "ALL DONE!!"

From b9c3b45d113e09d46412dac5add50b4bd475e2af Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Tue, 29 Apr 2025 10:22:31 +0700
Subject: [PATCH 12/24] Update test_compose_on_vllm_rocm.sh

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/tests/test_compose_on_vllm_rocm.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh
index e361719e28..2e8025d854 100644
--- a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh
+++ b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh
@@ -192,7 +192,7 @@ function validate_agent_service() {
 }
 
 function stop_agent_docker() {
-    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
+    cd $WORKPATH/docker_compose/amd/gpu/rocm/
     container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
     for container_name in $container_list; do
         cid=$(docker ps -aq --filter "name=$container_name")

From 31371e34b584667454df0a94d013252c0af76e27 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Tue, 29 Apr 2025 11:19:01 +0700
Subject: [PATCH 13/24] Adjust example config

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml     | 3 ---
 FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh | 1 -
 FinanceAgent/tests/test_compose_on_vllm_rocm.sh           | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml
index 62af173adb..ef2d84492b 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml
@@ -103,9 +103,6 @@ services:
   docsum-llm-textgen:
     image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
     container_name: docsum-llm-server
-    depends_on:
-      vllm-service:
-        condition: service_healthy
     ports:
       - "${DOCSUM_LLM_SERVER_PORT}:9000"
     ipc: host
diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh
index f3ab57099d..6279d084cf 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 export ip_address=$(hostname -I | awk '{print $1}')
-export WORKDIR=${PWD}
 export HOST_IP=${ip_address}
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
diff --git a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh
index 2e8025d854..5b758811ec 100644
--- a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh
+++ b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh
@@ -111,7 +111,7 @@ function ingest_validate_dataprep() {
 
     # test /v1/dataprep/get
     echo "=========== Test get ==========="
-    local CONTENT=$(python $WORKPATH/tests/test_redis_finance.py --port $DATAPREP_PORT --test_option get)
+    local CONTENT=$(python3 $WORKPATH/tests/test_redis_finance.py --port $DATAPREP_PORT --test_option get)
     local EXIT_CODE=$(validate "$CONTENT" "Request successful" "dataprep-redis-finance")
     echo "$EXIT_CODE"
     local EXIT_CODE="${EXIT_CODE:0-1}"

From 5176581e5af4c6c15dfe6aa16088115a38f7b9fe Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Tue, 29 Apr 2025 11:32:29 +0700
Subject: [PATCH 14/24] Update compose.yaml

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml
index ef2d84492b..45803bf2b1 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml
@@ -112,7 +112,7 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${LLM_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      MAX_INPUT_TOKENS: ${INPUT_TOKENS}
+      MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
       MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       DocSum_COMPONENT_NAME: "DocSum_COMPONENT_NAME:-OpeaDocSumvLLM"

From 490ec138d20c55d1dc24fe5f8d0c7e8221553a1a Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Tue, 29 Apr 2025 14:47:40 +0700
Subject: [PATCH 15/24] Add README.md for AMD ROCm deployment

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 .../amd/gpu/rocm/.README.md.kate-swp          | Bin 0 -> 485 bytes
 .../amd/gpu/rocm/.set_env_vllm.sh.kate-swp    | Bin 749 -> 0 bytes
 .../docker_compose/amd/gpu/rocm/README.md     | 190 ++++++++++++++++++
 3 files changed, 190 insertions(+)
 create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp
 delete mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp
 create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/README.md

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp b/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp
new file mode 100644
index 0000000000000000000000000000000000000000..40ee1ce33c1963ab860482fea09841cf81274183
GIT binary patch
literal 485
zcmYk&Jqp4=5C!1b#QbS&!~+BiOBJ!Qb_KC>5$k{{ga{&{l~`JMf!xT#+H-gX=V|d0
zNOnFZJ3Bif(pl!GB6FAgkj-~xk&Oo<5$X1ykN329uQpFxeO~+F`8M(0Ml+tiSNjWn
z4aK^Jej5n&F*Dl01&y8NL}R<jY1A+~I)F<$glqL8c*}eYXLJJB>NVjr^DVemFNKfR
zt#m~%aGS*MDT!(=5{ETO^pcQhH70SBh{Q!h5@!b_#?&A&MW4iwJyIpcy0tF4=f8Xb
DRn9Q$

literal 0
HcmV?d00001

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp b/FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp
deleted file mode 100644
index 5d6d484bbdb4ba4f0504a0b5b8b09968896ecfd2..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 749
zcmYk(NlF7j7>41>JeYZo>BMf#oTxDhf{LyphzMc~xNs9B3q`?&;6Xf);KoCE2Jc|M
z#V1&hm;Xu8O{HRtDO@aHSML06`Qn~EJgwa6_QV)d%9cMTkMBPp&hz5->;C)3Yc=oY
z*Z$7f|FF)%$d1PBf}wB>2Eqa83suk)CZH?qgO1>Ww$NDk!<dZ7kPJwl^hlR<X7+fV
zKeA4Fhs)%z0chFL7<;^?u*HreIH2(#b-|vsCfKu9g+rdH2=*FE!WBD3;S%J93dsFO
zRyblOBkX{*Fa;^043a_{Bm{ePalvCJCfEy!3Y+XigcA@JMj#~E;{*kJI{~50m3zb`
z8>B^=#3A;NTW6}18mW>Bu`ehw6-j~QNseSmhNMY~tdS&1kT{8vD2b3T36UT_<8STp
HO<?v5(bF~^

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md
new file mode 100644
index 0000000000..26c78bd53a
--- /dev/null
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md
@@ -0,0 +1,190 @@
+# Example Finance Agent deployments on AMD GPU (ROCm)
+
+This document outlines the deployment process for a Finance Agent application utilizing OPEA components on an AMD GPU server.
+
+This example includes the following sections:
+
+- [Finance Agent Quick Start Deployment](#finance-agent-quick-start-deployment): Demonstrates how to quickly deploy a Finance Agent application/pipeline on AMD GPU platform.
+- [Finance Agent Docker Compose Files](#finance-agent-docker-compose-files): Describes some example deployments and their docker compose files.
+- [How to interact with the agent system with UI](#how-to-interact-with-the-agent-system-with-ui): Guideline for UI usage
+
+## Finance Agent Quick Start Deployment
+
+This section describes how to quickly deploy and test the Finance Agent service manually on an AMD GPU platform. The basic steps are:
+
+1. [Access the Code](#access-the-code)
+2. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token)
+3. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose)
+4. [Check the Deployment Status](#check-the-deployment-status)
+5. [Test the Pipeline](#test-the-pipeline)
+6. [Cleanup the Deployment](#cleanup-the-deployment)
+
+### Access the Code
+
+Clone the GenAIExample repository and access the ChatQnA AMD GPU platform Docker Compose files and supporting scripts:
+
+```
+mkdir /path/to/your/workspace/
+export WORKDIR=/path/to/your/workspace/
+cd $WORKDIR
+git clone https://github.com/opea-project/GenAIExamples.git
+```
+
+Checkout a released version, such as v1.4:
+
+```
+git checkout v1.4
+```
+
+### Generate a HuggingFace Access Token
+
+Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token).
+
+
+### Deploy the Services Using Docker Compose
+
+#### 3.1 Launch vllm endpoint
+
+Below is the command to launch a vllm endpoint on Gaudi that serves `meta-llama/Llama-3.3-70B-Instruct` model on AMD ROCm platform.
+
+```bash
+cd $WORKDIR/GenAIExamples/FinanceAgent/docker_compose/amd/gpu/rocm
+bash launch_vllm.sh
+```
+
+#### 3.2 Prepare knowledge base
+
+The commands below will upload some example files into the knowledge base. You can also upload files through UI.
+
+First, launch the redis databases and the dataprep microservice.
+
+```bash
+# inside $WORKDIR/GenAIExamples/FinanceAgent/docker_compose/amd/gpu/rocm
+bash launch_dataprep.sh
+```
+
+Validate datat ingest data and retrieval from database:
+
+```bash
+python $WORKPATH/tests/test_redis_finance.py --port 6007 --test_option ingest
+python $WORKPATH/tests/test_redis_finance.py --port 6007 --test_option get
+```
+
+#### 3.3 Launch the multi-agent system
+
+The command below will launch 3 agent microservices, 1 docsum microservice, 1 UI microservice.
+
+```bash
+# inside $WORKDIR/GenAIExamples/FinanceAgent/docker_compose/amd/gpu/rocm
+bash launch_agents.sh
+```
+
+#### 3.4 Check the Deployment Status
+
+After running docker compose, check if all the containers launched via docker compose have started:
+
+```
+docker ps -a
+```
+
+For the default deployment, the following 5 containers should have started:
+
+```
+CONTAINER ID   IMAGE                                                   COMMAND                  CREATED          STATUS                 PORTS                                                                                      NAMES
+7e61978c3d75   opea/dataprep:latest                                    "sh -c 'python $( [ …"   31 seconds ago   Up 19 seconds          0.0.0.0:6007->5000/tcp, [::]:6007->5000/tcp                                                dataprep-redis-server-finance
+0fee87aca791   redis/redis-stack:7.2.0-v9                              "/entrypoint.sh"         3 hours ago      Up 3 hours (healthy)   0.0.0.0:6380->6379/tcp, [::]:6380->6379/tcp, 0.0.0.0:8002->8001/tcp, [::]:8002->8001/tcp   redis-kv-store
+debd549045f8   redis/redis-stack:7.2.0-v9                              "/entrypoint.sh"         3 hours ago      Up 3 hours (healthy)   0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp       redis-vector-db
+9cff469364d3   ghcr.io/huggingface/text-embeddings-inference:cpu-1.5   "/bin/sh -c 'apt-get…"   3 hours ago      Up 3 hours (healthy)   0.0.0.0:10221->80/tcp, [::]:10221->80/tcp                                                  tei-embedding-serving
+13f71e678dbd   opea/vllm-rocm:latest                                   "python3 /workspace/…"   3 hours ago      Up 3 hours (healthy)   0.0.0.0:8086->8011/tcp, [::]:8086->8011/tcp                                                vllm-service
+e5a219a77c95   opea/llm-docsum:latest                                  "bash entrypoint.sh"     3 hours ago      Up 2 seconds           0.0.0.0:33218->9000/tcp, [::]:33218->9000/tcp                                              docsum-llm-server
+```
+
+### 3.5 Validate agents
+
+FinQA Agent:
+
+```bash
+export agent_port="9095"
+prompt="What is Gap's revenue in 2024?"
+python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port
+```
+
+Research Agent:
+
+```bash
+export agent_port="9096"
+prompt="generate NVDA financial research report"
+python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port --tool_choice "get_current_date" --tool_choice "get_share_performance"
+```
+
+Supervisor Agent single turns:
+
+```bash
+export agent_port="9090"
+python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream
+```
+
+Supervisor Agent multi turn:
+
+```bash
+python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --multi-turn --stream
+
+```
+
+### Cleanup the Deployment
+
+To stop the containers associated with the deployment, execute the following commands:
+
+```
+docker compose -f compose.yaml down
+docker compose -f compose_vllm.yaml down
+docker compose -f dataprep_compose.yaml down
+```
+
+All the Finance Agent containers will be stopped and then removed on completion of the "down" command.
+
+## Finance Agent Docker Compose Files
+
+In the context of deploying a Finance Agent pipeline on an AMD GPU platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application.
+
+| File                                     | Description                                                                                |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------ |
+| [compose.yaml](./compose.yaml)           | Default compose to run agent service                                        |
+| [compose_vllm.yaml](./compose_vllm.yaml) | The LLM Service serving framework is vLLM.  |
+| [dataprep_compose.yaml](./dataprep_compose.yaml) | Compose file to run Data Prep service such as Redis vector DB, Re-rancer and Embeder  |
+
+
+## How to interact with the agent system with UI
+
+The UI microservice is launched in the previous step with the other microservices.
+To see the UI, open a web browser to `http://${ip_address}:5175` to access the UI. Note the `ip_address` here is the host IP of the UI microservice.
+
+1. Create Admin Account with a random value
+
+2. Enter the endpoints in the `Connections` settings
+
+   First, click on the user icon in the upper right corner to open `Settings`. Click on `Admin Settings`. Click on `Connections`.
+
+   Then, enter the supervisor agent endpoint in the `OpenAI API` section: `http://${ip_address}:9090/v1`. Enter the API key as "empty". Add an arbitrary model id in `Model IDs`, for example, "opea_agent". The `ip_address` here should be the host ip of the agent microservice.
+
+   Then, enter the dataprep endpoint in the `Icloud File API` section. You first need to enable `Icloud File API` by clicking on the button on the right to turn it into green and then enter the endpoint url, for example, `http://${ip_address}:6007/v1`. The `ip_address` here should be the host ip of the dataprep microservice.
+
+   You should see screen like the screenshot below when the settings are done.
+
+![opea-agent-setting](assets/ui_connections_settings.png)
+
+3. Upload documents with UI
+
+   Click on the `Workplace` icon in the top left corner. Click `Knowledge`. Click on the "+" sign to the right of `Icloud Knowledge`. You can paste an url in the left hand side of the pop-up window, or upload a local file by click on the cloud icon on the right hand side of the pop-up window. Then click on the `Upload Confirm` button. Wait till the processing is done and the pop-up window will be closed on its own when the data ingestion is done. See the screenshot below.
+
+   Note: the data ingestion may take a few minutes depending on the length of the document. Please wait patiently and do not close the pop-up window.
+
+![upload-doc-ui](assets/upload_doc_ui.png)
+
+4. Test agent with UI
+
+   After the settings are done and documents are ingested, you can start to ask questions to the agent. Click on the `New Chat` icon in the top left corner, and type in your questions in the text box in the middle of the UI.
+
+   The UI will stream the agent's response tokens. You need to expand the `Thinking` tab to see the agent's reasoning process. After the agent made tool calls, you would also see the tool output after the tool returns output to the agent. Note: it may take a while to get the tool output back if the tool execution takes time.
+
+![opea-agent-test](assets/opea-agent-test.png)

From 6569374ffd024c4918ecc016cc3e7a264510551c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 29 Apr 2025 07:48:19 +0000
Subject: [PATCH 16/24] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../amd/gpu/rocm/.README.md.kate-swp            | Bin 485 -> 486 bytes
 .../docker_compose/amd/gpu/rocm/README.md       |  12 +++++-------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp b/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp
index 40ee1ce33c1963ab860482fea09841cf81274183..f22ccb77c3c004851b9d182ad859421e95e641c4 100644
GIT binary patch
delta 15
WcmaFL{ET^nA|oT$WF^MKj9dUI90W4}

delta 13
UcmaFH{FHfvA|oU3WF^MK03gu>D*ylh

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md
index 26c78bd53a..2ad2716daf 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md
@@ -40,7 +40,6 @@ git checkout v1.4
 
 Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token).
 
-
 ### Deploy the Services Using Docker Compose
 
 #### 3.1 Launch vllm endpoint
@@ -147,12 +146,11 @@ All the Finance Agent containers will be stopped and then removed on completion
 
 In the context of deploying a Finance Agent pipeline on an AMD GPU platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application.
 
-| File                                     | Description                                                                                |
-| ---------------------------------------- | ------------------------------------------------------------------------------------------ |
-| [compose.yaml](./compose.yaml)           | Default compose to run agent service                                        |
-| [compose_vllm.yaml](./compose_vllm.yaml) | The LLM Service serving framework is vLLM.  |
-| [dataprep_compose.yaml](./dataprep_compose.yaml) | Compose file to run Data Prep service such as Redis vector DB, Re-rancer and Embeder  |
-
+| File                                             | Description                                                                          |
+| ------------------------------------------------ | ------------------------------------------------------------------------------------ |
+| [compose.yaml](./compose.yaml)                   | Default compose to run agent service                                                 |
+| [compose_vllm.yaml](./compose_vllm.yaml)         | The LLM Service serving framework is vLLM.                                           |
+| [dataprep_compose.yaml](./dataprep_compose.yaml) | Compose file to run Data Prep service such as Redis vector DB, Re-rancer and Embedder |
 
 ## How to interact with the agent system with UI
 

From 77a8e85cd53f3753cd0fb202df54cffb04b2136d Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Tue, 29 Apr 2025 14:57:17 +0700
Subject: [PATCH 17/24] Update README.md for AMD ROCm

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 .../amd/gpu/rocm/.README.md.kate-swp              | Bin 485 -> 0 bytes
 .../docker_compose/amd/gpu/rocm/README.md         |   6 +++---
 2 files changed, 3 insertions(+), 3 deletions(-)
 delete mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp b/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp
deleted file mode 100644
index 40ee1ce33c1963ab860482fea09841cf81274183..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 485
zcmYk&Jqp4=5C!1b#QbS&!~+BiOBJ!Qb_KC>5$k{{ga{&{l~`JMf!xT#+H-gX=V|d0
zNOnFZJ3Bif(pl!GB6FAgkj-~xk&Oo<5$X1ykN329uQpFxeO~+F`8M(0Ml+tiSNjWn
z4aK^Jej5n&F*Dl01&y8NL}R<jY1A+~I)F<$glqL8c*}eYXLJJB>NVjr^DVemFNKfR
zt#m~%aGS*MDT!(=5{ETO^pcQhH70SBh{Q!h5@!b_#?&A&MW4iwJyIpcy0tF4=f8Xb
DRn9Q$

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md
index 26c78bd53a..0560d17711 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md
@@ -171,7 +171,7 @@ To see the UI, open a web browser to `http://${ip_address}:5175` to access the U
 
    You should see screen like the screenshot below when the settings are done.
 
-![opea-agent-setting](assets/ui_connections_settings.png)
+![opea-agent-setting](../../../../assets/ui_connections_settings.png)
 
 3. Upload documents with UI
 
@@ -179,7 +179,7 @@ To see the UI, open a web browser to `http://${ip_address}:5175` to access the U
 
    Note: the data ingestion may take a few minutes depending on the length of the document. Please wait patiently and do not close the pop-up window.
 
-![upload-doc-ui](assets/upload_doc_ui.png)
+![upload-doc-ui](../../../../assets/upload_doc_ui.png)
 
 4. Test agent with UI
 
@@ -187,4 +187,4 @@ To see the UI, open a web browser to `http://${ip_address}:5175` to access the U
 
    The UI will stream the agent's response tokens. You need to expand the `Thinking` tab to see the agent's reasoning process. After the agent made tool calls, you would also see the tool output after the tool returns output to the agent. Note: it may take a while to get the tool output back if the tool execution takes time.
 
-![opea-agent-test](assets/opea-agent-test.png)
+![opea-agent-test](../../../../assets/opea-agent-test.png)

From ec645480b87db862247c419f698b3980324e7c0b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 29 Apr 2025 08:00:36 +0000
Subject: [PATCH 18/24] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 FinanceAgent/docker_compose/amd/gpu/rocm/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md
index 816bc3cd43..277d3f02ac 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md
@@ -146,10 +146,10 @@ All the Finance Agent containers will be stopped and then removed on completion
 
 In the context of deploying a Finance Agent pipeline on an AMD GPU platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application.
 
-| File                                             | Description                                                                          |
-| ------------------------------------------------ | ------------------------------------------------------------------------------------ |
-| [compose.yaml](./compose.yaml)                   | Default compose to run agent service                                                 |
-| [compose_vllm.yaml](./compose_vllm.yaml)         | The LLM Service serving framework is vLLM.                                           |
+| File                                             | Description                                                                           |
+| ------------------------------------------------ | ------------------------------------------------------------------------------------- |
+| [compose.yaml](./compose.yaml)                   | Default compose to run agent service                                                  |
+| [compose_vllm.yaml](./compose_vllm.yaml)         | The LLM Service serving framework is vLLM.                                            |
 | [dataprep_compose.yaml](./dataprep_compose.yaml) | Compose file to run Data Prep service such as Redis vector DB, Re-rancer and Embedder |
 
 ## How to interact with the agent system with UI

From de7c65b9d09fe8169527b363376a8e1b24df0a74 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Wed, 14 May 2025 14:03:00 +0700
Subject: [PATCH 19/24] Rename tests file for AMD ROCm

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 ...{test_compose_on_vllm_rocm.sh => test_compose_vllm_on_rocm.sh} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename FinanceAgent/tests/{test_compose_on_vllm_rocm.sh => test_compose_vllm_on_rocm.sh} (100%)

diff --git a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh
similarity index 100%
rename from FinanceAgent/tests/test_compose_on_vllm_rocm.sh
rename to FinanceAgent/tests/test_compose_vllm_on_rocm.sh

From eafda509e1c57b26e28ca769d27bc1a9c19700b6 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 16 May 2025 11:20:18 +0700
Subject: [PATCH 20/24] Update launch_vllm.sh

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
index 5d8d58641b..9d4100ec26 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
@@ -1,7 +1,9 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
-export MAX_LEN=16384
+#export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+#export MAX_LEN=16384
+export MAX_LEN=8192
 
 docker compose -f vllm_compose.yaml up -d

From b690eeb5ce9a8edcd6470547022e0cec38a15a02 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 16 May 2025 13:00:46 +0700
Subject: [PATCH 21/24] Update launch_vllm.sh

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
index 9d4100ec26..a5c9597751 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
@@ -3,7 +3,7 @@
 
 #export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-#export MAX_LEN=16384
-export MAX_LEN=8192
+export MAX_LEN=16384
+#export MAX_LEN=8192
 
 docker compose -f vllm_compose.yaml up -d

From 92d160b23e3102bf6c21cccecd572872d656ec37 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 16 May 2025 13:20:55 +0700
Subject: [PATCH 22/24] Adjust tests

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh | 6 +++---
 FinanceAgent/tests/test_compose_vllm_on_rocm.sh         | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
index a5c9597751..638660d7fb 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh
@@ -1,9 +1,9 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-#export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
+#export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export MAX_LEN=16384
-#export MAX_LEN=8192
+
 
 docker compose -f vllm_compose.yaml up -d
diff --git a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh
index 5b758811ec..076d464355 100644
--- a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh
+++ b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh
@@ -11,6 +11,8 @@ LOG_PATH=$WORKPATH
 
 #### env vars for LLM endpoint #############
 model=meta-llama/Llama-3.3-70B-Instruct
+export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
+export MAX_LEN=16384
 vllm_image=opea/vllm-rocm:latest
 vllm_port=8086
 vllm_image=$vllm_image
@@ -52,7 +54,7 @@ function build_agent_image_local(){
 }
 
 function start_vllm_service {
-    echo "start vllm gaudi service"
+    echo "start vllm service"
     docker compose -f $WORKPATH/docker_compose/amd/gpu/rocm/compose_vllm.yaml up -d
     sleep 1m
     echo "Waiting vllm rocm ready"

From 9d79858e6b38a4f3704625460efca004e5260d29 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 16 May 2025 17:08:39 +0700
Subject: [PATCH 23/24] Update test_compose_vllm_on_rocm.sh

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/tests/test_compose_vllm_on_rocm.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh
index 076d464355..cfa461e2eb 100644
--- a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh
+++ b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh
@@ -11,12 +11,12 @@ LOG_PATH=$WORKPATH
 
 #### env vars for LLM endpoint #############
 model=meta-llama/Llama-3.3-70B-Instruct
-export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
+export LLM_MODEL_ID=$model
 export MAX_LEN=16384
 vllm_image=opea/vllm-rocm:latest
-vllm_port=8086
+vllm_port=8081
 vllm_image=$vllm_image
-HF_CACHE_DIR=${model_cache:-"/data2/huggingface"}
+HF_CACHE_DIR=${model_cache:-"./data"}
 vllm_volume=${HF_CACHE_DIR}
 #######################################
 

From e4ed752deb9c1e9066e7ef66faf7e1e38b158f35 Mon Sep 17 00:00:00 2001
From: Artem Astafev <a.astafev@datamonsters.com>
Date: Fri, 16 May 2025 18:07:34 +0700
Subject: [PATCH 24/24] Fix tests

Signed-off-by: Artem Astafev <a.astafev@datamonsters.com>
---
 FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh | 2 +-
 FinanceAgent/tests/test_compose_vllm_on_rocm.sh           | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh
index 6279d084cf..db3ec09b99 100644
--- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh
+++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh
@@ -14,7 +14,7 @@ export recursion_limit_supervisor=10
 
 export vllm_port=8086
 export FINANCEAGENT_VLLM_SERVICE_PORT=${vllm_port}
-export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct"
 export LLM_ENDPOINT_URL="http://${ip_address}:${vllm_port}"
 export TEMPERATURE=0.5
 export MAX_TOKENS=4096
diff --git a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh
index cfa461e2eb..01131449a9 100644
--- a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh
+++ b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh
@@ -14,7 +14,8 @@ model=meta-llama/Llama-3.3-70B-Instruct
 export LLM_MODEL_ID=$model
 export MAX_LEN=16384
 vllm_image=opea/vllm-rocm:latest
-vllm_port=8081
+vllm_port=8086
+export FINANCEAGENT_VLLM_SERVICE_PORT=$vllm_port
 vllm_image=$vllm_image
 HF_CACHE_DIR=${model_cache:-"./data"}
 vllm_volume=${HF_CACHE_DIR}
@@ -163,8 +164,8 @@ function validate_agent_service() {
     echo $EXIT_CODE
     local EXIT_CODE="${EXIT_CODE:0-1}"
     if [ "$EXIT_CODE" == "1" ]; then
-	docker logs research-agent-endpoint
-	exit 1
+	  docker logs research-agent-endpoint
+	  exit 1
     fi
 
     # test supervisor react agent