From fd3824d73aa52928d48148b4496cf506e486df1b Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 25 Apr 2025 14:19:23 +0700 Subject: [PATCH 01/24] Add Config for vLLM Signed-off-by: Artem Astafev --- .../amd/gpu/rocm/compose_vllm.yaml | 265 ++++++++++++++++++ .../amd/gpu/rocm/set_env_vllm.sh | 44 +++ 2 files changed, 309 insertions(+) create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml new file mode 100644 index 0000000000..5e5c069223 --- /dev/null +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -0,0 +1,265 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + worker-finqa-agent: + image: opea/agent:latest + container_name: finqa-agent-endpoint + volumes: + - ${TOOLSET_PATH}:/home/user/tools/ + - ${PROMPT_PATH}:/home/user/prompts/ + ports: + - "9095:9095" + ipc: host + environment: + ip_address: ${ip_address} + strategy: react_llama + with_memory: false + recursion_limit: ${recursion_limit_worker} + llm_engine: vllm + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + llm_endpoint_url: ${LLM_ENDPOINT_URL} + model: ${LLM_MODEL_ID} + temperature: ${TEMPERATURE} + max_new_tokens: ${MAX_TOKENS} + stream: false + tools: /home/user/tools/finqa_agent_tools.yaml + custom_prompt: /home/user/prompts/finqa_prompt.py + require_human_feedback: false + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL_VECTOR: $REDIS_URL_VECTOR + REDIS_URL_KV: $REDIS_URL_KV + TEI_EMBEDDING_ENDPOINT: $TEI_EMBEDDING_ENDPOINT + port: 9095 + + worker-research-agent: + image: opea/agent:latest + container_name: research-agent-endpoint + volumes: + - ${TOOLSET_PATH}:/home/user/tools/ + - ${PROMPT_PATH}:/home/user/prompts/ + ports: + - "9096:9096" + ipc: host + environment: + ip_address: ${ip_address} + strategy: react_llama + with_memory: false + recursion_limit: 25 + llm_engine: vllm + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + llm_endpoint_url: ${LLM_ENDPOINT_URL} + model: ${LLM_MODEL_ID} + stream: false + tools: /home/user/tools/research_agent_tools.yaml + custom_prompt: /home/user/prompts/research_prompt.py + require_human_feedback: false + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + FINNHUB_API_KEY: ${FINNHUB_API_KEY} + FINANCIAL_DATASETS_API_KEY: ${FINANCIAL_DATASETS_API_KEY} + port: 9096 + + supervisor-react-agent: + image: opea/agent:latest + container_name: supervisor-agent-endpoint + depends_on: + - worker-finqa-agent + - worker-research-agent + volumes: + - ${TOOLSET_PATH}:/home/user/tools/ + - ${PROMPT_PATH}:/home/user/prompts/ + ports: + - "9090:9090" + ipc: host + environment: + ip_address: ${ip_address} + strategy: react_llama + with_memory: true + recursion_limit: ${recursion_limit_supervisor} + llm_engine: vllm + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + llm_endpoint_url: ${LLM_ENDPOINT_URL} + model: ${LLM_MODEL_ID} + temperature: ${TEMPERATURE} + max_new_tokens: ${MAX_TOKENS} + stream: true + tools: /home/user/tools/supervisor_agent_tools.yaml + custom_prompt: /home/user/prompts/supervisor_prompt.py + require_human_feedback: false + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + WORKER_FINQA_AGENT_URL: $WORKER_FINQA_AGENT_URL + WORKER_RESEARCH_AGENT_URL: $WORKER_RESEARCH_AGENT_URL + DOCSUM_ENDPOINT: $DOCSUM_ENDPOINT + REDIS_URL_VECTOR: $REDIS_URL_VECTOR + REDIS_URL_KV: $REDIS_URL_KV + TEI_EMBEDDING_ENDPOINT: $TEI_EMBEDDING_ENDPOINT + port: 9090 + + vllm-service: + image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${FINANCEAGENT_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + VLLM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + healthcheck: + test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT:-8081}/health || exit 1" ] + interval: 10s + timeout: 10s + retries: 100 + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-textgen: + image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + VLLM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + healthcheck: + test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT:-8081}/health || exit 1" ] + interval: 10s + timeout: 10s + retries: 100 + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + agent-ui: + image: opea/agent-ui:latest + container_name: agent-ui + environment: + host_ip: ${host_ip} + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + ports: + - "5175:8080" + ipc: host + + tei-embedding-serving: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-serving + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" + ports: + - "${TEI_EMBEDDER_PORT:-10221}:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + HF_TOKEN: ${HF_TOKEN} + healthcheck: + test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"] + interval: 10s + timeout: 6s + retries: 48 + + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "${REDIS_PORT1:-6379}:6379" + - "${REDIS_PORT2:-8001}:8001" + environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + healthcheck: + test: ["CMD", "redis-cli", "ping"] + timeout: 10s + retries: 3 + start_period: 10s + + redis-kv-store: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-kv-store + ports: + - "${REDIS_PORT3:-6380}:6379" + - "${REDIS_PORT4:-8002}:8001" + environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + healthcheck: + test: ["CMD", "redis-cli", "ping"] + timeout: 10s + retries: 3 + start_period: 10s + + dataprep-redis-finance: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-redis-server-finance + depends_on: + redis-vector-db: + condition: service_healthy + redis-kv-store: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + ports: + - "${DATAPREP_PORT:-6007}:5000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + DATAPREP_COMPONENT_NAME: ${DATAPREP_COMPONENT_NAME} + REDIS_URL_VECTOR: ${REDIS_URL_VECTOR} + REDIS_URL_KV: ${REDIS_URL_KV} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL: ${LLM_MODEL} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} + LOGFLAG: true diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh new file mode 100644 index 0000000000..acb0a484fb --- /dev/null +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -0,0 +1,44 @@ +export ip_address=$(hostname -I | awk '{print $1}') +export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} +export TOOLSET_PATH=$WORKDIR/GenAIExamples/FinanceAgent/tools/ +echo "TOOLSET_PATH=${TOOLSET_PATH}" +export PROMPT_PATH=$WORKDIR/GenAIExamples/FinanceAgent/prompts/ +echo "PROMPT_PATH=${PROMPT_PATH}" +export recursion_limit_worker=12 +export recursion_limit_supervisor=10 + +vllm_port=8086 +export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" +export LLM_ENDPOINT_URL="http://${ip_address}:${vllm_port}" +export TEMPERATURE=0.5 +export MAX_TOKENS=4096 + +export WORKER_FINQA_AGENT_URL="http://${ip_address}:9095/v1/chat/completions" +export WORKER_RESEARCH_AGENT_URL="http://${ip_address}:9096/v1/chat/completions" + +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:10221" +export REDIS_URL_VECTOR="redis://${ip_address}:6379" +export REDIS_URL_KV="redis://${ip_address}:6380" + +export MAX_INPUT_TOKENS=2048 +export MAX_TOTAL_TOKENS=4096 +export DOCSUM_COMPONENT_NAME="OpeaDocSumvLLM" +export DOCSUM_ENDPOINT="http://${ip_address}:9000/v1/docsum" + +export FINNHUB_API_KEY=${FINNHUB_API_KEY} +export FINANCIAL_DATASETS_API_KEY=${FINANCIAL_DATASETS_API_KEY} + + +export DATAPREP_PORT="6007" +export TEI_EMBEDDER_PORT="10221" +export REDIS_URL_VECTOR="redis://${ip_address}:6379" +export REDIS_URL_KV="redis://${ip_address}:6380" +export LLM_MODEL=$model +export LLM_ENDPOINT="http://${ip_address}:${vllm_port}" +export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS_FINANCE" +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" + +export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" +export MAX_LEN=16384 From ee743837b943e8a678dae37ec9f4e4b009011b01 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 25 Apr 2025 14:38:45 +0700 Subject: [PATCH 02/24] Update compose_vllm.yaml Signed-off-by: Artem Astafev --- .../amd/gpu/rocm/compose_vllm.yaml | 42 +++++++------------ 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 5e5c069223..061262dbbb 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -138,40 +138,26 @@ services: ipc: host docsum-llm-textgen: - image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + docsum-vllm-service: + condition: service_healthy ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + - "${DOCSUM_LLM_SERVER_PORT}:9000" + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + LLM_ENDPOINT: ${DOCSUM_LLM_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - VLLM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - healthcheck: - test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT:-8081}/health || exit 1" ] - interval: 10s - timeout: 10s - retries: 100 - volumes: - - "${MODEL_CACHE:-./data}:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + DocSum_COMPONENT_NAME: "DocSum_COMPONENT_NAME:-OpeaDocSumvLLM" + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped agent-ui: image: opea/agent-ui:latest From 6c50388faccb7d54a3bdcd44f95d5eeb2fa3dac7 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 25 Apr 2025 14:40:59 +0700 Subject: [PATCH 03/24] Update compose_vllm.yaml Signed-off-by: Artem Astafev --- FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 061262dbbb..40e43171d1 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -141,7 +141,7 @@ services: image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} container_name: docsum-llm-server depends_on: - docsum-vllm-service: + vllm-service: condition: service_healthy ports: - "${DOCSUM_LLM_SERVER_PORT}:9000" From f762e43bd82bb8b416561bc2135f595f0049de06 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 25 Apr 2025 14:52:23 +0700 Subject: [PATCH 04/24] Update example config Signed-off-by: Artem Astafev --- .../docker_compose/amd/gpu/rocm/compose_vllm.yaml | 8 ++++---- FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 40e43171d1..180e5923a0 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -110,14 +110,14 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 VLLM_USE_TRITON_FLASH_ATTENTION: 0 PYTORCH_JIT: 0 healthcheck: - test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT:-8081}/health || exit 1" ] + test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${FINANCEAGENT_VLLM_SERVICE_PORT:-8081}/health || exit 1" ] interval: 10s timeout: 10s retries: 100 @@ -134,7 +134,7 @@ services: security_opt: - seccomp:unconfined - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + command: "--model ${LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" ipc: host docsum-llm-textgen: diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh index acb0a484fb..0b7ff0aa51 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -7,7 +7,8 @@ echo "PROMPT_PATH=${PROMPT_PATH}" export recursion_limit_worker=12 export recursion_limit_supervisor=10 -vllm_port=8086 +export vllm_port=8086 +export FINANCEAGENT_VLLM_SERVICE_PORT=${vllm_port} export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" export LLM_ENDPOINT_URL="http://${ip_address}:${vllm_port}" export TEMPERATURE=0.5 From 277a698670db07a67271d682859eb71d536c09f2 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 25 Apr 2025 14:55:46 +0700 Subject: [PATCH 05/24] Update set_env_vllm.sh Signed-off-by: Artem Astafev --- FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 0b7ff0aa51..82ccff1caf 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -1,5 +1,5 @@ export ip_address=$(hostname -I | awk '{print $1}') -export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} +export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export TOOLSET_PATH=$WORKDIR/GenAIExamples/FinanceAgent/tools/ echo "TOOLSET_PATH=${TOOLSET_PATH}" export PROMPT_PATH=$WORKDIR/GenAIExamples/FinanceAgent/prompts/ From c5e3ab29886ea5b2916b4b26ab53df0dfd0989cb Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 25 Apr 2025 15:08:41 +0700 Subject: [PATCH 06/24] Update set_env_vllm.sh Signed-off-by: Artem Astafev --- FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 82ccff1caf..3d0b258c0f 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -1,5 +1,6 @@ export ip_address=$(hostname -I | awk '{print $1}') export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export TOOLSET_PATH=$WORKDIR/GenAIExamples/FinanceAgent/tools/ echo "TOOLSET_PATH=${TOOLSET_PATH}" export PROMPT_PATH=$WORKDIR/GenAIExamples/FinanceAgent/prompts/ From 14c2fbbea10c6cf3c51d45f5295622d9a1903d12 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 25 Apr 2025 15:39:26 +0700 Subject: [PATCH 07/24] Update set_env_vllm.sh Signed-off-by: Artem Astafev --- FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh index 3d0b258c0f..86c71187bb 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -1,4 +1,5 @@ export ip_address=$(hostname -I | awk '{print $1}') +export HOST_IP=${ip_address} export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export TOOLSET_PATH=$WORKDIR/GenAIExamples/FinanceAgent/tools/ @@ -10,7 +11,7 @@ export recursion_limit_supervisor=10 export vllm_port=8086 export FINANCEAGENT_VLLM_SERVICE_PORT=${vllm_port} -export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export LLM_ENDPOINT_URL="http://${ip_address}:${vllm_port}" export TEMPERATURE=0.5 export MAX_TOKENS=4096 @@ -42,5 +43,4 @@ export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS_FINANCE" export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" -export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" export MAX_LEN=16384 From a4f154fce4a5e909b023d65d25a72057cf192f5f Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 25 Apr 2025 15:40:51 +0700 Subject: [PATCH 08/24] Update compose_vllm.yaml Signed-off-by: Artem Astafev --- .../docker_compose/amd/gpu/rocm/compose_vllm.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 180e5923a0..0d09789f73 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -150,11 +150,11 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - LLM_ENDPOINT: ${DOCSUM_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LLM_ENDPOINT: ${LLM_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + MAX_INPUT_TOKENS: ${INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} + LLM_MODEL_ID: ${LLM_MODEL_ID} DocSum_COMPONENT_NAME: "DocSum_COMPONENT_NAME:-OpeaDocSumvLLM" LOGFLAG: ${LOGFLAG:-False} restart: unless-stopped From 8cf40257bb9b964ce88f5b58b6f2a5a53838daf0 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 25 Apr 2025 15:52:13 +0700 Subject: [PATCH 09/24] Update compose_vllm.yaml Signed-off-by: Artem Astafev --- FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 0d09789f73..b9c8a8f7e0 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -184,7 +184,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - host_ip: ${host_ip} + host_ip: ${HOST_IP} HF_TOKEN: ${HF_TOKEN} healthcheck: test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"] From b38ec3e03d6d1ff7c60aec670546610d0e9c8c3a Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Mon, 28 Apr 2025 15:08:32 +0700 Subject: [PATCH 10/24] Refactor FinanceAgent for rocm Signed-off-by: Artem Astafev --- .../amd/gpu/rocm/.set_env_vllm.sh.kate-swp | Bin 0 -> 749 bytes .../docker_compose/amd/gpu/rocm/compose.yaml | 135 +++++++++++ .../amd/gpu/rocm/compose_vllm.yaml | 214 +----------------- .../amd/gpu/rocm/dataprep_compose.yaml | 82 +++++++ .../{set_env_vllm.sh => launch_agents.sh} | 19 +- .../amd/gpu/rocm/launch_dataprep.sh | 15 ++ .../amd/gpu/rocm/launch_vllm.sh | 7 + 7 files changed, 246 insertions(+), 226 deletions(-) create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/dataprep_compose.yaml rename FinanceAgent/docker_compose/amd/gpu/rocm/{set_env_vllm.sh => launch_agents.sh} (71%) create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/launch_dataprep.sh create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp b/FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp new file mode 100644 index 0000000000000000000000000000000000000000..5d6d484bbdb4ba4f0504a0b5b8b09968896ecfd2 GIT binary patch literal 749 zcmYk(NlF7j7>41>JeYZo>BMf#oTxDhf{LyphzMc~xNs9B3q`?&;6Xf);KoCE2Jc|M z#V1&hm;Xu8O{HRtDO@aHSML06`Qn~EJgwa6_QV)d%9cMTkMBPp&hz5->;C)3Yc=oY z*Z$7f|FF)%$d1PBf}wB>2Eqa83suk)CZH?qgO1>Ww$NDk!B^=#3A;NTW6}18mW>Bu`ehw6-j~QNseSmhNMY~tdS&1kT{8vD2b3T36UT_<8STp HO Date: Mon, 28 Apr 2025 17:49:57 +0700 Subject: [PATCH 11/24] adjust rocm example Signed-off-by: Artem Astafev --- .../amd/gpu/rocm/compose_vllm.yaml | 2 +- .../amd/gpu/rocm/launch_vllm.sh | 2 +- FinanceAgent/docker_image_build/build.yaml | 5 + .../tests/test_compose_on_vllm_rocm.sh | 242 ++++++++++++++++++ 4 files changed, 249 insertions(+), 2 deletions(-) create mode 100644 FinanceAgent/tests/test_compose_on_vllm_rocm.sh diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml index ec1820053e..8fe2226d0b 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -4,7 +4,7 @@ services: vllm-service: image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service + container_name: vllm-service ports: - "${FINANCEAGENT_VLLM_SERVICE_PORT:-8081}:8011" environment: diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh index 0e13c7f9f9..5d8d58641b 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh @@ -1,7 +1,7 @@ # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" +export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" export MAX_LEN=16384 docker compose -f vllm_compose.yaml up -d diff --git a/FinanceAgent/docker_image_build/build.yaml b/FinanceAgent/docker_image_build/build.yaml index 7d113148a3..23d1af7b76 100644 --- a/FinanceAgent/docker_image_build/build.yaml +++ b/FinanceAgent/docker_image_build/build.yaml @@ -20,3 +20,8 @@ services: https_proxy: ${https_proxy} no_proxy: ${no_proxy} image: ${REGISTRY:-opea}/agent:${TAG:-latest} + vllm-rocm: + build: + context: GenAIComps + dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu + image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} diff --git a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh new file mode 100644 index 0000000000..e361719e28 --- /dev/null +++ b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh @@ -0,0 +1,242 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +set -xe + +export WORKPATH=$(dirname "$PWD") +export WORKDIR=$WORKPATH/../../ +echo "WORKDIR=${WORKDIR}" +export ip_address=$(hostname -I | awk '{print $1}') +LOG_PATH=$WORKPATH + +#### env vars for LLM endpoint ############# +model=meta-llama/Llama-3.3-70B-Instruct +vllm_image=opea/vllm-rocm:latest +vllm_port=8086 +vllm_image=$vllm_image +HF_CACHE_DIR=${model_cache:-"/data2/huggingface"} +vllm_volume=${HF_CACHE_DIR} +####################################### + +#### env vars for dataprep ############# +export host_ip=${ip_address} +export DATAPREP_PORT="6007" +export TEI_EMBEDDER_PORT="10221" +export REDIS_URL_VECTOR="redis://${ip_address}:6379" +export REDIS_URL_KV="redis://${ip_address}:6380" +export LLM_MODEL=$model +export LLM_ENDPOINT="http://${ip_address}:${vllm_port}" +export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS_FINANCE" +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${TEI_EMBEDDER_PORT}" +####################################### + + + +function get_genai_comps() { + if [ ! -d "GenAIComps" ] ; then + git clone --depth 1 --branch ${opea_branch:-"main"} https://github.com/opea-project/GenAIComps.git + fi +} + +function build_dataprep_agent_and_vllm_images() { + cd $WORKDIR/GenAIExamples/FinanceAgent/docker_image_build/ + get_genai_comps + echo "Build agent image with --no-cache..." + docker compose -f build.yaml build --no-cache +} + +function build_agent_image_local(){ + cd $WORKDIR/GenAIComps/ + docker build -t opea/agent:latest -f comps/agent/src/Dockerfile . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy +} + +function start_vllm_service { + echo "start vllm gaudi service" + docker compose -f $WORKPATH/docker_compose/amd/gpu/rocm/compose_vllm.yaml up -d + sleep 1m + echo "Waiting vllm rocm ready" + n=0 + until [[ "$n" -ge 500 ]]; do + docker logs vllm-service >& "${LOG_PATH}"/vllm-service_start.log + if grep -q "Application startup complete" "${LOG_PATH}"/vllm-service_start.log; then + break + fi + sleep 10s + n=$((n+1)) + done + sleep 10s + echo "Service started successfully" +} + + +function stop_llm(){ + cid=$(docker ps -aq --filter "name=vllm-service") + echo "Stopping container $cid" + if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi + +} + +function start_dataprep(){ + docker compose -f $WORKPATH/docker_compose/amd/gpu/rocm/dataprep_compose.yaml up -d + sleep 1m +} + +function validate() { + local CONTENT="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + echo "EXPECTED_RESULT: $EXPECTED_RESULT" + echo "Content: $CONTENT" + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT" + echo 0 + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + echo 1 + fi +} + +function ingest_validate_dataprep() { + # test /v1/dataprep/ingest + echo "=========== Test ingest ===========" + local CONTENT=$(python3 $WORKPATH/tests/test_redis_finance.py --port $DATAPREP_PORT --test_option ingest) + local EXIT_CODE=$(validate "$CONTENT" "200" "dataprep-redis-finance") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + if [ "$EXIT_CODE" == "1" ]; then + docker logs dataprep-redis-server-finance + exit 1 + fi + + # test /v1/dataprep/get + echo "=========== Test get ===========" + local CONTENT=$(python $WORKPATH/tests/test_redis_finance.py --port $DATAPREP_PORT --test_option get) + local EXIT_CODE=$(validate "$CONTENT" "Request successful" "dataprep-redis-finance") + echo "$EXIT_CODE" + local EXIT_CODE="${EXIT_CODE:0-1}" + if [ "$EXIT_CODE" == "1" ]; then + docker logs dataprep-redis-server-finance + exit 1 + fi +} + +function stop_dataprep() { + echo "Stopping databases" + cid=$(docker ps -aq --filter "name=dataprep-redis-server*" --filter "name=redis-*" --filter "name=tei-embedding-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + +} + +function start_agents() { + echo "Starting Agent services" + cd $WORKDIR/GenAIExamples/FinanceAgent/docker_compose/amd/gpu/rocm/ + bash launch_agents.sh + sleep 2m +} + + +function validate_agent_service() { + # # test worker finqa agent + echo "======================Testing worker finqa agent======================" + export agent_port="9095" + prompt="What is Gap's revenue in 2024?" + local CONTENT=$(python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port) + echo $CONTENT + local EXIT_CODE=$(validate "$CONTENT" "15" "finqa-agent-endpoint") + echo $EXIT_CODE + local EXIT_CODE="${EXIT_CODE:0-1}" + if [ "$EXIT_CODE" == "1" ]; then + docker logs finqa-agent-endpoint + exit 1 + fi + + # # test worker research agent + echo "======================Testing worker research agent======================" + export agent_port="9096" + prompt="Johnson & Johnson" + local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port --tool_choice "get_current_date" --tool_choice "get_share_performance") + local EXIT_CODE=$(validate "$CONTENT" "Johnson" "research-agent-endpoint") + echo $CONTENT + echo $EXIT_CODE + local EXIT_CODE="${EXIT_CODE:0-1}" + if [ "$EXIT_CODE" == "1" ]; then + docker logs research-agent-endpoint + exit 1 + fi + + # test supervisor react agent + echo "======================Testing supervisor agent: single turns ======================" + export agent_port="9090" + local CONTENT=$(python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream) + echo $CONTENT + local EXIT_CODE=$(validate "$CONTENT" "test completed with success" "supervisor-agent-endpoint") + echo $EXIT_CODE + local EXIT_CODE="${EXIT_CODE:0-1}" + if [ "$EXIT_CODE" == "1" ]; then + docker logs supervisor-agent-endpoint + exit 1 + fi + + # echo "======================Testing supervisor agent: multi turns ======================" + local CONTENT=$(python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --multi-turn --stream) + echo $CONTENT + local EXIT_CODE=$(validate "$CONTENT" "test completed with success" "supervisor-agent-endpoint") + echo $EXIT_CODE + local EXIT_CODE="${EXIT_CODE:0-1}" + if [ "$EXIT_CODE" == "1" ]; then + docker logs supervisor-agent-endpoint + exit 1 + fi + +} + +function stop_agent_docker() { + cd $WORKPATH/docker_compose/intel/hpu/gaudi/ + container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2) + for container_name in $container_list; do + cid=$(docker ps -aq --filter "name=$container_name") + echo "Stopping container $container_name" + if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi + done +} + + +echo "workpath: $WORKPATH" +echo "=================== Stop containers ====================" +stop_llm +stop_agent_docker +stop_dataprep + +cd $WORKPATH/tests + +echo "=================== #1 Building docker images====================" +build_dataprep_agent_and_vllm_images + +#### for local test +# build_agent_image_local +# echo "=================== #1 Building docker images completed====================" + +echo "=================== #2 Start vllm endpoint====================" +start_vllm_service +echo "=================== #2 vllm endpoint started====================" + +echo "=================== #3 Start dataprep and ingest data ====================" +start_dataprep +ingest_validate_dataprep +echo "=================== #3 Data ingestion and validation completed====================" + +echo "=================== #4 Start agents ====================" +start_agents +validate_agent_service +echo "=================== #4 Agent test passed ====================" + +echo "=================== #5 Stop microservices ====================" +stop_agent_docker +stop_dataprep +stop_llm +echo "=================== #5 Microservices stopped====================" + +echo y | docker system prune + +echo "ALL DONE!!" From b9c3b45d113e09d46412dac5add50b4bd475e2af Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Tue, 29 Apr 2025 10:22:31 +0700 Subject: [PATCH 12/24] Update test_compose_on_vllm_rocm.sh Signed-off-by: Artem Astafev --- FinanceAgent/tests/test_compose_on_vllm_rocm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh index e361719e28..2e8025d854 100644 --- a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh +++ b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh @@ -192,7 +192,7 @@ function validate_agent_service() { } function stop_agent_docker() { - cd $WORKPATH/docker_compose/intel/hpu/gaudi/ + cd $WORKPATH/docker_compose/amd/gpu/rocm/ container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2) for container_name in $container_list; do cid=$(docker ps -aq --filter "name=$container_name") From 31371e34b584667454df0a94d013252c0af76e27 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Tue, 29 Apr 2025 11:19:01 +0700 Subject: [PATCH 13/24] Adjust example config Signed-off-by: Artem Astafev --- FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml | 3 --- FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh | 1 - FinanceAgent/tests/test_compose_on_vllm_rocm.sh | 2 +- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml index 62af173adb..ef2d84492b 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml @@ -103,9 +103,6 @@ services: docsum-llm-textgen: image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} container_name: docsum-llm-server - depends_on: - vllm-service: - condition: service_healthy ports: - "${DOCSUM_LLM_SERVER_PORT}:9000" ipc: host diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh index f3ab57099d..6279d084cf 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 export ip_address=$(hostname -I | awk '{print $1}') -export WORKDIR=${PWD} export HOST_IP=${ip_address} export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} diff --git a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh index 2e8025d854..5b758811ec 100644 --- a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh +++ b/FinanceAgent/tests/test_compose_on_vllm_rocm.sh @@ -111,7 +111,7 @@ function ingest_validate_dataprep() { # test /v1/dataprep/get echo "=========== Test get ===========" - local CONTENT=$(python $WORKPATH/tests/test_redis_finance.py --port $DATAPREP_PORT --test_option get) + local CONTENT=$(python3 $WORKPATH/tests/test_redis_finance.py --port $DATAPREP_PORT --test_option get) local EXIT_CODE=$(validate "$CONTENT" "Request successful" "dataprep-redis-finance") echo "$EXIT_CODE" local EXIT_CODE="${EXIT_CODE:0-1}" From 5176581e5af4c6c15dfe6aa16088115a38f7b9fe Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Tue, 29 Apr 2025 11:32:29 +0700 Subject: [PATCH 14/24] Update compose.yaml Signed-off-by: Artem Astafev --- FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml b/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml index ef2d84492b..45803bf2b1 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/compose.yaml @@ -112,7 +112,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${LLM_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - MAX_INPUT_TOKENS: ${INPUT_TOKENS} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} LLM_MODEL_ID: ${LLM_MODEL_ID} DocSum_COMPONENT_NAME: "DocSum_COMPONENT_NAME:-OpeaDocSumvLLM" From 490ec138d20c55d1dc24fe5f8d0c7e8221553a1a Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Tue, 29 Apr 2025 14:47:40 +0700 Subject: [PATCH 15/24] Add README.md for AMD ROCm deployment Signed-off-by: Artem Astafev --- .../amd/gpu/rocm/.README.md.kate-swp | Bin 0 -> 485 bytes .../amd/gpu/rocm/.set_env_vllm.sh.kate-swp | Bin 749 -> 0 bytes .../docker_compose/amd/gpu/rocm/README.md | 190 ++++++++++++++++++ 3 files changed, 190 insertions(+) create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp delete mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp create mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/README.md diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp b/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp new file mode 100644 index 0000000000000000000000000000000000000000..40ee1ce33c1963ab860482fea09841cf81274183 GIT binary patch literal 485 zcmYk&Jqp4=5C!1b#QbS&!~+BiOBJ!Qb_KC>5$k{{ga{&{l~`JMf!xT#+H-gX=V|d0 zNOnFZJ3Bif(pl!GB6FAgkj-~xk&Oo<5$X1ykN329uQpFxeO~+F`8M(0Ml+tiSNjWn z4aK^Jej5n&F*Dl01&y8NL}RNVjr^DVemFNKfR zt#m~%aGS*MDT!(=5{ETO^pcQhH70SBh{Q!h5@!b_#?&A&MW4iwJyIpcy0tF4=f8Xb DRn9Q$ literal 0 HcmV?d00001 diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp b/FinanceAgent/docker_compose/amd/gpu/rocm/.set_env_vllm.sh.kate-swp deleted file mode 100644 index 5d6d484bbdb4ba4f0504a0b5b8b09968896ecfd2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 749 zcmYk(NlF7j7>41>JeYZo>BMf#oTxDhf{LyphzMc~xNs9B3q`?&;6Xf);KoCE2Jc|M z#V1&hm;Xu8O{HRtDO@aHSML06`Qn~EJgwa6_QV)d%9cMTkMBPp&hz5->;C)3Yc=oY z*Z$7f|FF)%$d1PBf}wB>2Eqa83suk)CZH?qgO1>Ww$NDk!B^=#3A;NTW6}18mW>Bu`ehw6-j~QNseSmhNMY~tdS&1kT{8vD2b3T36UT_<8STp HO5000/tcp, [::]:6007->5000/tcp dataprep-redis-server-finance +0fee87aca791 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 3 hours ago Up 3 hours (healthy) 0.0.0.0:6380->6379/tcp, [::]:6380->6379/tcp, 0.0.0.0:8002->8001/tcp, [::]:8002->8001/tcp redis-kv-store +debd549045f8 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 3 hours ago Up 3 hours (healthy) 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db +9cff469364d3 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "/bin/sh -c 'apt-get…" 3 hours ago Up 3 hours (healthy) 0.0.0.0:10221->80/tcp, [::]:10221->80/tcp tei-embedding-serving +13f71e678dbd opea/vllm-rocm:latest "python3 /workspace/…" 3 hours ago Up 3 hours (healthy) 0.0.0.0:8086->8011/tcp, [::]:8086->8011/tcp vllm-service +e5a219a77c95 opea/llm-docsum:latest "bash entrypoint.sh" 3 hours ago Up 2 seconds 0.0.0.0:33218->9000/tcp, [::]:33218->9000/tcp docsum-llm-server +``` + +### 3.5 Validate agents + +FinQA Agent: + +```bash +export agent_port="9095" +prompt="What is Gap's revenue in 2024?" +python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port +``` + +Research Agent: + +```bash +export agent_port="9096" +prompt="generate NVDA financial research report" +python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --prompt "$prompt" --agent_role "worker" --ext_port $agent_port --tool_choice "get_current_date" --tool_choice "get_share_performance" +``` + +Supervisor Agent single turns: + +```bash +export agent_port="9090" +python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --stream +``` + +Supervisor Agent multi turn: + +```bash +python3 $WORKDIR/GenAIExamples/FinanceAgent/tests/test.py --agent_role "supervisor" --ext_port $agent_port --multi-turn --stream + +``` + +### Cleanup the Deployment + +To stop the containers associated with the deployment, execute the following commands: + +``` +docker compose -f compose.yaml down +docker compose -f compose_vllm.yaml down +docker compose -f dataprep_compose.yaml down +``` + +All the Finance Agent containers will be stopped and then removed on completion of the "down" command. + +## Finance Agent Docker Compose Files + +In the context of deploying a Finance Agent pipeline on an AMD GPU platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application. + +| File | Description | +| ---------------------------------------- | ------------------------------------------------------------------------------------------ | +| [compose.yaml](./compose.yaml) | Default compose to run agent service | +| [compose_vllm.yaml](./compose_vllm.yaml) | The LLM Service serving framework is vLLM. | +| [dataprep_compose.yaml](./dataprep_compose.yaml) | Compose file to run Data Prep service such as Redis vector DB, Re-rancer and Embeder | + + +## How to interact with the agent system with UI + +The UI microservice is launched in the previous step with the other microservices. +To see the UI, open a web browser to `http://${ip_address}:5175` to access the UI. Note the `ip_address` here is the host IP of the UI microservice. + +1. Create Admin Account with a random value + +2. Enter the endpoints in the `Connections` settings + + First, click on the user icon in the upper right corner to open `Settings`. Click on `Admin Settings`. Click on `Connections`. + + Then, enter the supervisor agent endpoint in the `OpenAI API` section: `http://${ip_address}:9090/v1`. Enter the API key as "empty". Add an arbitrary model id in `Model IDs`, for example, "opea_agent". The `ip_address` here should be the host ip of the agent microservice. + + Then, enter the dataprep endpoint in the `Icloud File API` section. You first need to enable `Icloud File API` by clicking on the button on the right to turn it into green and then enter the endpoint url, for example, `http://${ip_address}:6007/v1`. The `ip_address` here should be the host ip of the dataprep microservice. + + You should see screen like the screenshot below when the settings are done. + +![opea-agent-setting](assets/ui_connections_settings.png) + +3. Upload documents with UI + + Click on the `Workplace` icon in the top left corner. Click `Knowledge`. Click on the "+" sign to the right of `Icloud Knowledge`. You can paste an url in the left hand side of the pop-up window, or upload a local file by click on the cloud icon on the right hand side of the pop-up window. Then click on the `Upload Confirm` button. Wait till the processing is done and the pop-up window will be closed on its own when the data ingestion is done. See the screenshot below. + + Note: the data ingestion may take a few minutes depending on the length of the document. Please wait patiently and do not close the pop-up window. + +![upload-doc-ui](assets/upload_doc_ui.png) + +4. Test agent with UI + + After the settings are done and documents are ingested, you can start to ask questions to the agent. Click on the `New Chat` icon in the top left corner, and type in your questions in the text box in the middle of the UI. + + The UI will stream the agent's response tokens. You need to expand the `Thinking` tab to see the agent's reasoning process. After the agent made tool calls, you would also see the tool output after the tool returns output to the agent. Note: it may take a while to get the tool output back if the tool execution takes time. + +![opea-agent-test](assets/opea-agent-test.png) From 6569374ffd024c4918ecc016cc3e7a264510551c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 07:48:19 +0000 Subject: [PATCH 16/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../amd/gpu/rocm/.README.md.kate-swp | Bin 485 -> 486 bytes .../docker_compose/amd/gpu/rocm/README.md | 12 +++++------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp b/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp index 40ee1ce33c1963ab860482fea09841cf81274183..f22ccb77c3c004851b9d182ad859421e95e641c4 100644 GIT binary patch delta 15 WcmaFL{ET^nA|oT$WF^MKj9dUI90W4} delta 13 UcmaFH{FHfvA|oU3WF^MK03gu>D*ylh diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md index 26c78bd53a..2ad2716daf 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md @@ -40,7 +40,6 @@ git checkout v1.4 Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token). - ### Deploy the Services Using Docker Compose #### 3.1 Launch vllm endpoint @@ -147,12 +146,11 @@ All the Finance Agent containers will be stopped and then removed on completion In the context of deploying a Finance Agent pipeline on an AMD GPU platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application. -| File | Description | -| ---------------------------------------- | ------------------------------------------------------------------------------------------ | -| [compose.yaml](./compose.yaml) | Default compose to run agent service | -| [compose_vllm.yaml](./compose_vllm.yaml) | The LLM Service serving framework is vLLM. | -| [dataprep_compose.yaml](./dataprep_compose.yaml) | Compose file to run Data Prep service such as Redis vector DB, Re-rancer and Embeder | - +| File | Description | +| ------------------------------------------------ | ------------------------------------------------------------------------------------ | +| [compose.yaml](./compose.yaml) | Default compose to run agent service | +| [compose_vllm.yaml](./compose_vllm.yaml) | The LLM Service serving framework is vLLM. | +| [dataprep_compose.yaml](./dataprep_compose.yaml) | Compose file to run Data Prep service such as Redis vector DB, Re-rancer and Embedder | ## How to interact with the agent system with UI From 77a8e85cd53f3753cd0fb202df54cffb04b2136d Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Tue, 29 Apr 2025 14:57:17 +0700 Subject: [PATCH 17/24] Update README.md for AMD ROCm Signed-off-by: Artem Astafev --- .../amd/gpu/rocm/.README.md.kate-swp | Bin 485 -> 0 bytes .../docker_compose/amd/gpu/rocm/README.md | 6 +++--- 2 files changed, 3 insertions(+), 3 deletions(-) delete mode 100644 FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp b/FinanceAgent/docker_compose/amd/gpu/rocm/.README.md.kate-swp deleted file mode 100644 index 40ee1ce33c1963ab860482fea09841cf81274183..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 485 zcmYk&Jqp4=5C!1b#QbS&!~+BiOBJ!Qb_KC>5$k{{ga{&{l~`JMf!xT#+H-gX=V|d0 zNOnFZJ3Bif(pl!GB6FAgkj-~xk&Oo<5$X1ykN329uQpFxeO~+F`8M(0Ml+tiSNjWn z4aK^Jej5n&F*Dl01&y8NL}RNVjr^DVemFNKfR zt#m~%aGS*MDT!(=5{ETO^pcQhH70SBh{Q!h5@!b_#?&A&MW4iwJyIpcy0tF4=f8Xb DRn9Q$ diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md index 26c78bd53a..0560d17711 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md @@ -171,7 +171,7 @@ To see the UI, open a web browser to `http://${ip_address}:5175` to access the U You should see screen like the screenshot below when the settings are done. -![opea-agent-setting](assets/ui_connections_settings.png) +![opea-agent-setting](../../../../assets/ui_connections_settings.png) 3. Upload documents with UI @@ -179,7 +179,7 @@ To see the UI, open a web browser to `http://${ip_address}:5175` to access the U Note: the data ingestion may take a few minutes depending on the length of the document. Please wait patiently and do not close the pop-up window. -![upload-doc-ui](assets/upload_doc_ui.png) +![upload-doc-ui](../../../../assets/upload_doc_ui.png) 4. Test agent with UI @@ -187,4 +187,4 @@ To see the UI, open a web browser to `http://${ip_address}:5175` to access the U The UI will stream the agent's response tokens. You need to expand the `Thinking` tab to see the agent's reasoning process. After the agent made tool calls, you would also see the tool output after the tool returns output to the agent. Note: it may take a while to get the tool output back if the tool execution takes time. -![opea-agent-test](assets/opea-agent-test.png) +![opea-agent-test](../../../../assets/opea-agent-test.png) From ec645480b87db862247c419f698b3980324e7c0b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 08:00:36 +0000 Subject: [PATCH 18/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- FinanceAgent/docker_compose/amd/gpu/rocm/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md index 816bc3cd43..277d3f02ac 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/README.md +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/README.md @@ -146,10 +146,10 @@ All the Finance Agent containers will be stopped and then removed on completion In the context of deploying a Finance Agent pipeline on an AMD GPU platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application. -| File | Description | -| ------------------------------------------------ | ------------------------------------------------------------------------------------ | -| [compose.yaml](./compose.yaml) | Default compose to run agent service | -| [compose_vllm.yaml](./compose_vllm.yaml) | The LLM Service serving framework is vLLM. | +| File | Description | +| ------------------------------------------------ | ------------------------------------------------------------------------------------- | +| [compose.yaml](./compose.yaml) | Default compose to run agent service | +| [compose_vllm.yaml](./compose_vllm.yaml) | The LLM Service serving framework is vLLM. | | [dataprep_compose.yaml](./dataprep_compose.yaml) | Compose file to run Data Prep service such as Redis vector DB, Re-rancer and Embedder | ## How to interact with the agent system with UI From de7c65b9d09fe8169527b363376a8e1b24df0a74 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Wed, 14 May 2025 14:03:00 +0700 Subject: [PATCH 19/24] Rename tests file for AMD ROCm Signed-off-by: Artem Astafev --- ...{test_compose_on_vllm_rocm.sh => test_compose_vllm_on_rocm.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename FinanceAgent/tests/{test_compose_on_vllm_rocm.sh => test_compose_vllm_on_rocm.sh} (100%) diff --git a/FinanceAgent/tests/test_compose_on_vllm_rocm.sh b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh similarity index 100% rename from FinanceAgent/tests/test_compose_on_vllm_rocm.sh rename to FinanceAgent/tests/test_compose_vllm_on_rocm.sh From eafda509e1c57b26e28ca769d27bc1a9c19700b6 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 16 May 2025 11:20:18 +0700 Subject: [PATCH 20/24] Update launch_vllm.sh Signed-off-by: Artem Astafev --- FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh index 5d8d58641b..9d4100ec26 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh @@ -1,7 +1,9 @@ # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" -export MAX_LEN=16384 +#export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" +#export MAX_LEN=16384 +export MAX_LEN=8192 docker compose -f vllm_compose.yaml up -d From b690eeb5ce9a8edcd6470547022e0cec38a15a02 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 16 May 2025 13:00:46 +0700 Subject: [PATCH 21/24] Update launch_vllm.sh Signed-off-by: Artem Astafev --- FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh index 9d4100ec26..a5c9597751 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh @@ -3,7 +3,7 @@ #export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" -#export MAX_LEN=16384 -export MAX_LEN=8192 +export MAX_LEN=16384 +#export MAX_LEN=8192 docker compose -f vllm_compose.yaml up -d From 92d160b23e3102bf6c21cccecd572872d656ec37 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 16 May 2025 13:20:55 +0700 Subject: [PATCH 22/24] Adjust tests Signed-off-by: Artem Astafev --- FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh | 6 +++--- FinanceAgent/tests/test_compose_vllm_on_rocm.sh | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh index a5c9597751..638660d7fb 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_vllm.sh @@ -1,9 +1,9 @@ # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -#export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" -export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" +export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" +#export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export MAX_LEN=16384 -#export MAX_LEN=8192 + docker compose -f vllm_compose.yaml up -d diff --git a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh index 5b758811ec..076d464355 100644 --- a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh +++ b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh @@ -11,6 +11,8 @@ LOG_PATH=$WORKPATH #### env vars for LLM endpoint ############# model=meta-llama/Llama-3.3-70B-Instruct +export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" +export MAX_LEN=16384 vllm_image=opea/vllm-rocm:latest vllm_port=8086 vllm_image=$vllm_image @@ -52,7 +54,7 @@ function build_agent_image_local(){ } function start_vllm_service { - echo "start vllm gaudi service" + echo "start vllm service" docker compose -f $WORKPATH/docker_compose/amd/gpu/rocm/compose_vllm.yaml up -d sleep 1m echo "Waiting vllm rocm ready" From 9d79858e6b38a4f3704625460efca004e5260d29 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 16 May 2025 17:08:39 +0700 Subject: [PATCH 23/24] Update test_compose_vllm_on_rocm.sh Signed-off-by: Artem Astafev --- FinanceAgent/tests/test_compose_vllm_on_rocm.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh index 076d464355..cfa461e2eb 100644 --- a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh +++ b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh @@ -11,12 +11,12 @@ LOG_PATH=$WORKPATH #### env vars for LLM endpoint ############# model=meta-llama/Llama-3.3-70B-Instruct -export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" +export LLM_MODEL_ID=$model export MAX_LEN=16384 vllm_image=opea/vllm-rocm:latest -vllm_port=8086 +vllm_port=8081 vllm_image=$vllm_image -HF_CACHE_DIR=${model_cache:-"/data2/huggingface"} +HF_CACHE_DIR=${model_cache:-"./data"} vllm_volume=${HF_CACHE_DIR} ####################################### From e4ed752deb9c1e9066e7ef66faf7e1e38b158f35 Mon Sep 17 00:00:00 2001 From: Artem Astafev Date: Fri, 16 May 2025 18:07:34 +0700 Subject: [PATCH 24/24] Fix tests Signed-off-by: Artem Astafev --- FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh | 2 +- FinanceAgent/tests/test_compose_vllm_on_rocm.sh | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh index 6279d084cf..db3ec09b99 100644 --- a/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh +++ b/FinanceAgent/docker_compose/amd/gpu/rocm/launch_agents.sh @@ -14,7 +14,7 @@ export recursion_limit_supervisor=10 export vllm_port=8086 export FINANCEAGENT_VLLM_SERVICE_PORT=${vllm_port} -export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" +export LLM_MODEL_ID="meta-llama/Llama-3.3-70B-Instruct" export LLM_ENDPOINT_URL="http://${ip_address}:${vllm_port}" export TEMPERATURE=0.5 export MAX_TOKENS=4096 diff --git a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh index cfa461e2eb..01131449a9 100644 --- a/FinanceAgent/tests/test_compose_vllm_on_rocm.sh +++ b/FinanceAgent/tests/test_compose_vllm_on_rocm.sh @@ -14,7 +14,8 @@ model=meta-llama/Llama-3.3-70B-Instruct export LLM_MODEL_ID=$model export MAX_LEN=16384 vllm_image=opea/vllm-rocm:latest -vllm_port=8081 +vllm_port=8086 +export FINANCEAGENT_VLLM_SERVICE_PORT=$vllm_port vllm_image=$vllm_image HF_CACHE_DIR=${model_cache:-"./data"} vllm_volume=${HF_CACHE_DIR} @@ -163,8 +164,8 @@ function validate_agent_service() { echo $EXIT_CODE local EXIT_CODE="${EXIT_CODE:0-1}" if [ "$EXIT_CODE" == "1" ]; then - docker logs research-agent-endpoint - exit 1 + docker logs research-agent-endpoint + exit 1 fi # test supervisor react agent