From cf60682c8290f1191c5d3e4609a8ad3b8d1b162a Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 01/23] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/docker_image_build/build.yaml | 9 + DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 6 files changed, 574 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## šŸš€ Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## šŸš€ Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## šŸš€ Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## šŸš€ Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index 095fd28c93..dc0d546189 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,3 +47,12 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + vllm_rocm: + build: + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile-vllm-rocm + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From 1fd1de1530328321d28aa6d9db85fffeb876574c Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 02/23] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/docker_image_build/build.yaml | 9 - DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 6 files changed, 574 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## šŸš€ Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## šŸš€ Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## šŸš€ Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## šŸš€ Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index dc0d546189..095fd28c93 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,12 +47,3 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - vllm_rocm: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../ - dockerfile: ./Dockerfile-vllm-rocm - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From bd2d47e7e53e1241c27aed0f823fa680d8ecf4e2 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 03/23] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/docker_image_build/build.yaml | 9 + DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 6 files changed, 574 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## šŸš€ Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## šŸš€ Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## šŸš€ Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## šŸš€ Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index 095fd28c93..dc0d546189 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,3 +47,12 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + vllm_rocm: + build: + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile-vllm-rocm + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From 2459ecbc53fdb7c9c449930700cff290de15c152 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 04/23] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/docker_image_build/build.yaml | 9 - DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 6 files changed, 574 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## šŸš€ Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## šŸš€ Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## šŸš€ Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## šŸš€ Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index dc0d546189..095fd28c93 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,12 +47,3 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - vllm_rocm: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../ - dockerfile: ./Dockerfile-vllm-rocm - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From 6d5049dd1c6bb3e201c4ca807da6950e0ab4b9d2 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 05/23] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/docker_image_build/build.yaml | 9 + DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 6 files changed, 574 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## šŸš€ Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## šŸš€ Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## šŸš€ Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## šŸš€ Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index 095fd28c93..dc0d546189 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,3 +47,12 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + vllm_rocm: + build: + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile-vllm-rocm + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From 9dfbdc5cffe708b084e7367d6df2910908f5e76a Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 06/23] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/docker_image_build/build.yaml | 9 - DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 6 files changed, 574 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## šŸš€ Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## šŸš€ Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## šŸš€ Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## šŸš€ Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index dc0d546189..095fd28c93 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,12 +47,3 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - vllm_rocm: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../ - dockerfile: ./Dockerfile-vllm-rocm - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From a8857ae326b2d71ca66bc6f86715ac9ab467ac85 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 07/23] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/docker_image_build/build.yaml | 9 + DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 6 files changed, 574 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## šŸš€ Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## šŸš€ Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## šŸš€ Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## šŸš€ Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index 095fd28c93..dc0d546189 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,3 +47,12 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + vllm_rocm: + build: + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile-vllm-rocm + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From 5a38b266ac77a2bf0766cefab14ec62f28633a8d Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 08/23] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/docker_image_build/build.yaml | 9 - DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 6 files changed, 574 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## šŸš€ Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## šŸš€ Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## šŸš€ Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## šŸš€ Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index dc0d546189..095fd28c93 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -47,12 +47,3 @@ services: dockerfile: comps/llms/src/doc-summarization/Dockerfile extends: docsum image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - vllm_rocm: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../ - dockerfile: ./Dockerfile-vllm-rocm - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From 9ccf540b892c0ae3a58a004afcb01d3647a92c90 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 24 Apr 2025 20:01:07 +0700 Subject: [PATCH 09/23] DocSum - refactoring README.md Signed-off-by: Chingis Yundunov --- DocSum/docker_compose/amd/gpu/rocm/README.md | 138 +++++++++++++++---- 1 file changed, 108 insertions(+), 30 deletions(-) diff --git a/DocSum/docker_compose/amd/gpu/rocm/README.md b/DocSum/docker_compose/amd/gpu/rocm/README.md index 2c4a196149..92922f4b65 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/README.md +++ b/DocSum/docker_compose/amd/gpu/rocm/README.md @@ -25,15 +25,15 @@ This section describes how to quickly deploy and test the DocSum service manuall Clone the GenAIExample repository and access the ChatQnA AMD GPU platform Docker Compose files and supporting scripts: -``` +```bash git clone https://github.com/opea-project/GenAIExamples.git cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm ``` -Checkout a released version, such as v1.2: +Checkout a released version, such as v1.3: ``` -git checkout v1.2 +git checkout v1.3 ``` ### Generate a HuggingFace Access Token @@ -42,33 +42,96 @@ Some HuggingFace resources, such as some models, are only accessible if you have ### Configure the Deployment Environment -To set up environment variables for deploying DocSum services, source the _set_env.sh_ script in this directory: +To set up environment variables for deploying ChatQnA services, set up some parameters specific to the deployment environment and source the `set_env_*.sh` script in this directory: -``` -source ./set_env.sh +- if used vLLM - set_env_vllm.sh +- if used TGI - set_env.sh + +Set the values of the variables: + +- **HOST_IP, HOST_IP_EXTERNAL** - These variables are used to configure the name/address of the service in the operating system environment for the application services to interact with each other and with the outside world. + + If your server uses only an internal address and is not accessible from the Internet, then the values for these two variables will be the same and the value will be equal to the server's internal name/address. + + If your server uses only an external, Internet-accessible address, then the values for these two variables will be the same and the value will be equal to the server's external name/address. + + If your server is located on an internal network, has an internal address, but is accessible from the Internet via a proxy/firewall/load balancer, then the HOST_IP variable will have a value equal to the internal name/address of the server, and the EXTERNAL_HOST_IP variable will have a value equal to the external name/address of the proxy/firewall/load balancer behind which the server is located. + + We set these values in the file set_env\*\*\*\*.sh + +- **Variables with names like "**\*\*\*\*\*\*\_PORT"\*\* - These variables set the IP port numbers for establishing network connections to the application services. + The values shown in the file set_env.sh or set_env_vllm.sh they are the values used for the development and testing of the application, as well as configured for the environment in which the development is performed. These values must be configured in accordance with the rules of network access to your environment's server, and must not overlap with the IP ports of other applications that are already in use. + +Setting variables in the operating system environment: + +```bash +export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token" +source ./set_env_*.sh # replace the script name with the appropriate one ``` -The _set_env.sh_ script will prompt for required and optional environment variables used to configure the DocSum services. If a value is not entered, the script will use a default value for the same. It will also generate a _.env_ file defining the desired configuration. Consult the section on [DocSum Service configuration](#docsum-service-configuration) for information on how service specific configuration parameters affect deployments. +Consult the section on [DocSum Service configuration](#docsum-configuration) for information on how service specific configuration parameters affect deployments. ### Deploy the Services Using Docker Compose -To deploy the DocSum services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute: +To deploy the DocSum services, execute the `docker compose up` command with the appropriate arguments. For a default deployment with TGI, execute the command below. It uses the 'compose.yaml' file. ```bash -docker compose up -d +cd docker_compose/amd/gpu/rocm +# if used TGI +docker compose -f compose.yaml up -d +# if used vLLM +# docker compose -f compose_vllm.yaml up -d +``` + +To enable GPU support for AMD GPUs, the following configuration is added to the Docker Compose file: + +- compose_vllm.yaml - for vLLM-based application +- compose.yaml - for TGI-based + +```yaml +shm_size: 1g +devices: + - /dev/kfd:/dev/kfd + - /dev/dri:/dev/dri +cap_add: + - SYS_PTRACE +group_add: + - video +security_opt: + - seccomp:unconfined +``` + +This configuration forwards all available GPUs to the container. To use a specific GPU, specify its `cardN` and `renderN` device IDs. For example: + +```yaml +shm_size: 1g +devices: + - /dev/kfd:/dev/kfd + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/render128:/dev/dri/render128 +cap_add: + - SYS_PTRACE +group_add: + - video +security_opt: + - seccomp:unconfined ``` -**Note**: developers should build docker image from source when: +**How to Identify GPU Device IDs:** +Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs for your GPU. -- Developing off the git main branch (as the container's ports in the repo may be different from the published docker image). -- Unable to download the docker image. -- Use a specific version of Docker image. +> **Note**: developers should build docker image from source when: +> +> - Developing off the git main branch (as the container's ports in the repo may be different > from the published docker image). +> - Unable to download the docker image. +> - Use a specific version of Docker image. Please refer to the table below to build different microservices from source: | Microservice | Deployment Guide | -| ------------ | ------------------------------------------------------------------------------------------------------------------------------------- | +|--------------| ------------------------------------------------------------------------------------------------------------------------------------- | | whisper | [whisper build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/whisper/src) | +| TGI | [TGI project](https://github.com/huggingface/text-generation-inference.git) | | vLLM | [vLLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/vllm#build-docker) | | llm-docsum | [LLM-DocSum build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/src/doc-summarization#12-build-docker-image) | | MegaService | [MegaService build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image) | @@ -84,6 +147,7 @@ docker ps -a For the default deployment, the following 5 containers should have started: +If used TGI: ``` CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 748f577b3c78 opea/whisper:latest "python whisper_s…" 5 minutes ago Up About a minute 0.0.0.0:7066->7066/tcp, :::7066->7066/tcp whisper-service @@ -93,24 +157,38 @@ fds3dd5b9fd8 opea/docsum:latest "py 78964d0c1hg5 ghcr.io/huggingface/text-generation-inference:2.4.1-rocm "/tgi-entrypoint.sh" 5 minutes ago Up 5 minutes (healthy) 0.0.0.0:8008->80/tcp, [::]:8008->80/tcp docsum-tgi-service ``` +If used vLLM: +``` +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +748f577b3c78 opea/whisper:latest "python whisper_s…" 5 minutes ago Up About a minute 0.0.0.0:7066->7066/tcp, :::7066->7066/tcp whisper-service +4eq8b7034fd9 opea/docsum-gradio-ui:latest "docker-entrypoint.s…" 5 minutes ago Up About a minute 0.0.0.0:5173->5173/tcp, :::5173->5173/tcp docsum-ui-server +fds3dd5b9fd8 opea/docsum:latest "python docsum.py" 5 minutes ago Up About a minute 0.0.0.0:8888->8888/tcp, :::8888->8888/tcp docsum-backend-server +78fsd6fabfs7 opea/llm-docsum:latest "bash entrypoint.sh" 5 minutes ago Up About a minute 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp docsum-llm-server +78964d0c1hg5 opea/vllm-rocm:latest "python3 /workspace/…" 5 minutes ago Up 5 minutes (healthy) 0.0.0.0:8008->80/tcp, [::]:8008->80/tcp docsum-vllm-service +``` + ### Test the Pipeline Once the DocSum services are running, test the pipeline using the following command: ```bash -curl -X POST http://${host_ip}:8888/v1/docsum \ +curl -X POST http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: application/json" \ -d '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' ``` -**Note** The value of _host_ip_ was set using the _set_env.sh_ script and can be found in the _.env_ file. +**Note** The value of _HOST_IP_ was set using the _set_env.sh_ script and can be found in the _.env_ file. ### Cleanup the Deployment To stop the containers associated with the deployment, execute the following command: -``` +```bash +# if used TGI docker compose -f compose.yaml down +# if used vLLM +# docker compose -f compose_vllm.yaml down + ``` All the DocSum containers will be stopped and then removed on completion of the "down" command. @@ -132,7 +210,7 @@ There are also some customized usage. ```bash # form input. Use English mode (default). -curl http://${host_ip}:8888/v1/docsum \ +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: multipart/form-data" \ -F "type=text" \ -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \ @@ -141,7 +219,7 @@ curl http://${host_ip}:8888/v1/docsum \ -F "stream=True" # Use Chinese mode. -curl http://${host_ip}:8888/v1/docsum \ +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: multipart/form-data" \ -F "type=text" \ -F "messages=2024幓9月26ę—„ļ¼ŒåŒ—äŗ¬ā€”ā€”ä»Šę—„ļ¼Œč‹±ē‰¹å°”ę­£å¼å‘åøƒč‹±ē‰¹å°”Ā® 至强® 6ę€§čƒ½ę øå¤„ē†å™Øļ¼ˆä»£å·Granite Rapidsļ¼‰ļ¼ŒäøŗAIć€ę•°ę®åˆ†ęžć€ē§‘å­¦č®”ē®—ē­‰č®”ē®—åÆ†é›†åž‹äøšåŠ”ęä¾›å“č¶Šę€§čƒ½ć€‚" \ @@ -150,7 +228,7 @@ curl http://${host_ip}:8888/v1/docsum \ -F "stream=True" # Upload file -curl http://${host_ip}:8888/v1/docsum \ +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: multipart/form-data" \ -F "type=text" \ -F "messages=" \ @@ -166,11 +244,11 @@ curl http://${host_ip}:8888/v1/docsum \ Audio: ```bash -curl -X POST http://${host_ip}:8888/v1/docsum \ +curl -X POST http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: application/json" \ -d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -curl http://${host_ip}:8888/v1/docsum \ +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: multipart/form-data" \ -F "type=audio" \ -F "messages=UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA" \ @@ -182,11 +260,11 @@ curl http://${host_ip}:8888/v1/docsum \ Video: ```bash -curl -X POST http://${host_ip}:8888/v1/docsum \ +curl -X POST http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: application/json" \ -d '{"type": "video", "messages": "convert your video to base64 data type"}' -curl http://${host_ip}:8888/v1/docsum \ +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: multipart/form-data" \ -F "type=video" \ -F "messages=convert your video to base64 data type" \ @@ -208,7 +286,7 @@ If you want to deal with long context, can set following parameters and select s "summary_type" is set to be "auto" by default, in this mode we will check input token length, if it exceed `MAX_INPUT_TOKENS`, `summary_type` will automatically be set to `refine` mode, otherwise will be set to `stuff` mode. ```bash -curl http://${host_ip}:8888/v1/docsum \ +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: multipart/form-data" \ -F "type=text" \ -F "messages=" \ @@ -223,7 +301,7 @@ curl http://${host_ip}:8888/v1/docsum \ In this mode LLM generate summary based on complete input text. In this case please carefully set `MAX_INPUT_TOKENS` and `MAX_TOTAL_TOKENS` according to your model and device memory, otherwise it may exceed LLM context limit and raise error when meet long context. ```bash -curl http://${host_ip}:8888/v1/docsum \ +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: multipart/form-data" \ -F "type=text" \ -F "messages=" \ @@ -238,7 +316,7 @@ curl http://${host_ip}:8888/v1/docsum \ Truncate mode will truncate the input text and keep only the first chunk, whose length is equal to `min(MAX_TOTAL_TOKENS - input.max_tokens - 50, MAX_INPUT_TOKENS)` ```bash -curl http://${host_ip}:8888/v1/docsum \ +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: multipart/form-data" \ -F "type=text" \ -F "messages=" \ @@ -255,7 +333,7 @@ Map_reduce mode will split the inputs into multiple chunks, map each document to In this mode, default `chunk_size` is set to be `min(MAX_TOTAL_TOKENS - input.max_tokens - 50, MAX_INPUT_TOKENS)` ```bash -curl http://${host_ip}:8888/v1/docsum \ +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: multipart/form-data" \ -F "type=text" \ -F "messages=" \ @@ -272,7 +350,7 @@ Refin mode will split the inputs into multiple chunks, generate summary for the In this mode, default `chunk_size` is set to be `min(MAX_TOTAL_TOKENS - 2 * input.max_tokens - 128, MAX_INPUT_TOKENS)`. ```bash -curl http://${host_ip}:8888/v1/docsum \ +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: multipart/form-data" \ -F "type=text" \ -F "messages=" \ @@ -288,7 +366,7 @@ Several UI options are provided. If you need to work with multimedia documents, ### Gradio UI -To access the UI, use the URL - http://${EXTERNAL_HOST_IP}:${FAGGEN_UI_PORT} +To access the UI, use the URL - http://${HOST_IP}:${DOCSUM_FRONTEND_PORT} A page should open when you click through to this address: ![UI start page](../../../../assets/img/ui-starting-page.png) From d28db57f624cd4a5f87a1fa7f3bb7d9fa6ccb471 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 10/23] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 5 files changed, 565 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## šŸš€ Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## šŸš€ Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## šŸš€ Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## šŸš€ Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From a644d2a81cbe837a5087fea230628b26a3085997 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 11/23] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 5 files changed, 565 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## šŸš€ Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## šŸš€ Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## šŸš€ Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## šŸš€ Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From fe1a26910528310586e3be842d3ccff49178e4c7 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 12/23] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 5 files changed, 565 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## šŸš€ Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## šŸš€ Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## šŸš€ Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## šŸš€ Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From 450ba96d344001a143da130d28d7547b8abadec5 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 13/23] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 5 files changed, 565 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## šŸš€ Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## šŸš€ Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## šŸš€ Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## šŸš€ Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From f5f94b9c37a01c0814d1c4bc3ee7a9ff60fc626d Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 14/23] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 5 files changed, 565 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## šŸš€ Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## šŸš€ Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## šŸš€ Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## šŸš€ Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From 78700f6c9473b0ea837789d197d94fa0da6bd2a9 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 15/23] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 5 files changed, 565 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## šŸš€ Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## šŸš€ Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## šŸš€ Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## šŸš€ Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From 6b756fde3862b334e84d10bac1856475a0a95cf3 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:02:03 +0700 Subject: [PATCH 16/23] DocSum - add files for deploy app with ROCm vLLM Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 ++ .../amd/gpu/rocm-vllm/README.md | 175 ++++++++++++ .../amd/gpu/rocm-vllm/compose.yaml | 107 ++++++++ .../amd/gpu/rocm-vllm/set_env.sh | 16 ++ DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ++++++++++++++++++ 5 files changed, 565 insertions(+) create mode 100644 DocSum/Dockerfile-vllm-rocm create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml create mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh create mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm new file mode 100644 index 0000000000..f0e8a8743a --- /dev/null +++ b/DocSum/Dockerfile-vllm-rocm @@ -0,0 +1,18 @@ +FROM rocm/vllm-dev:main + +# Set the working directory +WORKDIR /workspace + +# Copy the api_server.py into the image +ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py + +# Expose the port used by the API server +EXPOSE 8011 + +# Set environment variables +ENV HUGGINGFACE_HUB_CACHE=/workspace +ENV WILM_USE_TRITON_FLASH_ATTENTION=0 +ENV PYTORCH_JIT=0 + +# Set the entrypoint to the api_server.py script +ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md new file mode 100644 index 0000000000..4d41a5cd31 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md @@ -0,0 +1,175 @@ +# Build and deploy DocSum Application on AMD GPU (ROCm) + +## Build images + +## šŸš€ Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build LLM Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . +``` + +Then run the command `docker images`, you will have the following four Docker Images: + +### 2. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: + +```bash +git clone https://github.com/opea-project/GenAIExamples +cd GenAIExamples/DocSum/ +docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 3. Build UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` + +### 4. Build React UI Docker Image + +Build the frontend Docker image via below command: + +```bash +cd GenAIExamples/DocSum/ui +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . + +docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +Then run the command `docker images`, you will have the following Docker Images: + +1. `opea/llm-docsum-tgi:latest` +2. `opea/docsum:latest` +3. `opea/docsum-ui:latest` +4. `opea/docsum-react-ui:latest` + +## šŸš€ Start Microservices and MegaService + +### Required Models + +Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. +For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +```bash +export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${host_ip} +export DOCSUM_TGI_SERVICE_PORT="18882" +export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export DOCSUM_LLM_SERVER_PORT="8008" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +``` + +Note: Please replace with `host_ip` with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 +``` + +Example for set isolation for 2 GPUs + +``` + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD129:/dev/dri/renderD129 +``` + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +### Start Microservice Docker Containers + +```bash +cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm +docker compose up -d +``` + +### Validate Microservices + +1. TGI Service + + ```bash + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/docsum \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' + ``` + +3. MegaService + + ```bash + curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false + }' + ``` + +## šŸš€ Launch the Svelte UI + +Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + +![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) + +Here is an example for summarizing a article. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) + +## šŸš€ Launch the React UI (Optional) + +To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: + +```yaml +docsum-rocm-react-ui-server: + image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} + container_name: docsum-rocm-react-ui-server + depends_on: + - docsum-rocm-backend-server + ports: + - "5174:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} +``` + +Open this URL `http://{host_ip}:5175` in your browser to access the frontend. + +![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml new file mode 100644 index 0000000000..037aa06395 --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + docsum-vllm-service: + image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} + container_name: docsum-vllm-service + ports: + - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + WILM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + volumes: + - "./data:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + docsum-llm-server: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-llm-server + depends_on: + - docsum-vllm-service + ports: + - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" + ipc: host + cap_add: + - SYS_PTRACE + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" + HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} + LOGFLAG: ${DOCSUM_LOGFLAG:-False} + MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} + restart: unless-stopped + + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "${DOCSUM_WHISPER_PORT:-7066}:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-backend-server + depends_on: + - docsum-tgi-service + - docsum-llm-server + ports: + - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-ui-server + depends_on: + - docsum-backend-server + ports: + - "${DOCSUM_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh new file mode 100644 index 0000000000..43e71e0fbf --- /dev/null +++ b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP="" +export DOCSUM_MAX_INPUT_TOKENS=2048 +export DOCSUM_MAX_TOTAL_TOKENS=4096 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh new file mode 100644 index 0000000000..d0919a019a --- /dev/null +++ b/DocSum/tests/test_compose_on_rocm_vllm.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 +export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export HOST_IP=${ip_address} +export DOCSUM_VLLM_SERVICE_PORT="8008" +export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export DOCSUM_LLM_SERVER_PORT="9000" +export DOCSUM_WHISPER_PORT="7066" +export DOCSUM_BACKEND_SERVER_PORT="8888" +export DOCSUM_FRONTEND_PORT="5173" +export MEGA_SERVICE_HOST_IP=${HOST_IP} +export LLM_SERVICE_HOST_IP=${HOST_IP} +export ASR_SERVICE_HOST_IP=${HOST_IP} +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm + sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env + # Start Docker Containers + docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + + echo "===========================================" + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "CONTENT==> $CONTENT" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +get_base64_str() { + local file_name=$1 + base64 -w 0 "$file_name" +} + +# Function to generate input data for testing based on the document type +input_data_for_test() { + local document_type=$1 + case $document_type in + ("text") + echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." + ;; + ("audio") + get_base64_str "$WORKPATH/tests/data/test.wav" + ;; + ("video") + get_base64_str "$WORKPATH/tests/data/test.mp4" + ;; + (*) + echo "Invalid document type" >&2 + exit 1 + ;; + esac +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # whisper microservice + ulimit -s 65536 + validate_services \ + "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ + '{"asr_result":"well"}' \ + "whisper-service" \ + "whisper-service" \ + "{\"audio\": \"$(input_data_for_test "audio")\"}" + + # vLLM service + validate_services \ + "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ + "generated_text" \ + "docsum-vllm-service" \ + "docsum-vllm-service" \ + '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' + + # llm microservice + validate_services \ + "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ + "text" \ + "docsum-llm-server" \ + "docsum-llm-server" \ + '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + +} + +function validate_megaservice() { + local SERVICE_NAME="docsum-backend-server" + local DOCKER_NAME="docsum-backend-server" + local EXPECTED_RESULT="[DONE]" + local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + local URL="${host_ip}:8888/v1/docsum" + local DATA_TYPE="type=text" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_megaservice_json() { + # Curl the Mega Service + echo "" + echo ">>> Checking text data with Content-Type: application/json" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + + echo ">>> Checking audio data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" + + echo ">>> Checking video data" + validate_services \ + "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ + "[DONE]" \ + "docsum-backend-server" \ + "docsum-backend-server" \ + "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ + docker compose stop && docker compose rm -f +} + +function main() { + echo "===========================================" + echo ">>>> Stopping any running Docker containers..." + stop_docker + + echo "===========================================" + if [[ "$IMAGE_REPO" == "opea" ]]; then + echo ">>>> Building Docker images..." + build_docker_images + fi + + echo "===========================================" + echo ">>>> Starting Docker services..." + start_services + + echo "===========================================" + echo ">>>> Validating microservices..." + validate_microservices + + echo "===========================================" + echo ">>>> Validating megaservice..." + validate_megaservice + echo ">>>> Validating validate_megaservice_json..." + validate_megaservice_json + + echo "===========================================" + echo ">>>> Stopping Docker containers..." + stop_docker + + echo "===========================================" + echo ">>>> Pruning Docker system..." + echo y | docker system prune + echo ">>>> Docker system pruned successfully." + echo "===========================================" +} + +main From 6d6317754949d8d5de96ba5f63b515f50433b8c1 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Thu, 13 Feb 2025 10:07:05 +0700 Subject: [PATCH 17/23] DocSum - fix main Signed-off-by: Chingis Yundunov --- DocSum/Dockerfile-vllm-rocm | 18 -- .../amd/gpu/rocm-vllm/README.md | 175 ------------ .../amd/gpu/rocm-vllm/compose.yaml | 107 -------- .../amd/gpu/rocm-vllm/set_env.sh | 16 -- DocSum/tests/test_compose_on_rocm_vllm.sh | 249 ------------------ 5 files changed, 565 deletions(-) delete mode 100644 DocSum/Dockerfile-vllm-rocm delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/README.md delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml delete mode 100644 DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh delete mode 100644 DocSum/tests/test_compose_on_rocm_vllm.sh diff --git a/DocSum/Dockerfile-vllm-rocm b/DocSum/Dockerfile-vllm-rocm deleted file mode 100644 index f0e8a8743a..0000000000 --- a/DocSum/Dockerfile-vllm-rocm +++ /dev/null @@ -1,18 +0,0 @@ -FROM rocm/vllm-dev:main - -# Set the working directory -WORKDIR /workspace - -# Copy the api_server.py into the image -ADD https://raw.githubusercontent.com/vllm-project/vllm/refs/tags/v0.7.0/vllm/entrypoints/openai/api_server.py /workspace/api_server.py - -# Expose the port used by the API server -EXPOSE 8011 - -# Set environment variables -ENV HUGGINGFACE_HUB_CACHE=/workspace -ENV WILM_USE_TRITON_FLASH_ATTENTION=0 -ENV PYTORCH_JIT=0 - -# Set the entrypoint to the api_server.py script -ENTRYPOINT ["python3", "/workspace/api_server.py"] diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md b/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md deleted file mode 100644 index 4d41a5cd31..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Build and deploy DocSum Application on AMD GPU (ROCm) - -## Build images - -## šŸš€ Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LLM Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile . -``` - -Then run the command `docker images`, you will have the following four Docker Images: - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `docsum.py` Python script. Build the MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples -cd GenAIExamples/DocSum/ -docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` - -### 4. Build React UI Docker Image - -Build the frontend Docker image via below command: - -```bash -cd GenAIExamples/DocSum/ui -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT -f ./docker/Dockerfile.react . - -docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/llm-docsum-tgi:latest` -2. `opea/docsum:latest` -3. `opea/docsum-ui:latest` -4. `opea/docsum-react-ui:latest` - -## šŸš€ Start Microservices and MegaService - -### Required Models - -Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" in environment variables below if you want to use another model. -For gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) in "HUGGINGFACEHUB_API_TOKEN" environment variable. - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${host_ip} -export DOCSUM_TGI_SERVICE_PORT="18882" -export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -export DOCSUM_LLM_SERVER_PORT="8008" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" -``` - -Note: Please replace with `host_ip` with your external IP address, do not use localhost. - -Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -Example for set isolation for 1 GPU - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 -``` - -Example for set isolation for 2 GPUs - -``` - - /dev/dri/card0:/dev/dri/card0 - - /dev/dri/renderD128:/dev/dri/renderD128 - - /dev/dri/card1:/dev/dri/card1 - - /dev/dri/renderD129:/dev/dri/renderD129 -``` - -Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) - -### Start Microservice Docker Containers - -```bash -cd GenAIExamples/DocSum/docker_compose/amd/gpu/rocm -docker compose up -d -``` - -### Validate Microservices - -1. TGI Service - - ```bash - curl http://${host_ip}:8008/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ - -H 'Content-Type: application/json' - ``` - -2. LLM Microservice - - ```bash - curl http://${host_ip}:9000/v1/docsum \ - -X POST \ - -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ - -H 'Content-Type: application/json' - ``` - -3. MegaService - - ```bash - curl http://${host_ip}:8888/v1/docsum -H "Content-Type: application/json" -d '{ - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":32, "language":"en", "stream":false - }' - ``` - -## šŸš€ Launch the Svelte UI - -Open this URL `http://{host_ip}:5173` in your browser to access the frontend. - -![project-screenshot](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/93b1ed4b-4b76-4875-927e-cc7818b4825b) - -Here is an example for summarizing a article. - -![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/67ecb2ec-408d-4e81-b124-6ded6b833f55) - -## šŸš€ Launch the React UI (Optional) - -To access the React-based frontend, modify the UI service in the `compose.yaml` file. Replace `docsum-rocm-ui-server` service with the `docsum-rocm-react-ui-server` service as per the config below: - -```yaml -docsum-rocm-react-ui-server: - image: ${REGISTRY:-opea}/docsum-react-ui:${TAG:-latest} - container_name: docsum-rocm-react-ui-server - depends_on: - - docsum-rocm-backend-server - ports: - - "5174:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} -``` - -Open this URL `http://{host_ip}:5175` in your browser to access the frontend. - -![project-screenshot](../../../../assets/img/docsum-ui-react.png) diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml deleted file mode 100644 index 037aa06395..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/compose.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - docsum-vllm-service: - image: ${REGISTRY:-opea}/llm-vllm-rocm:${TAG:-latest} - container_name: docsum-vllm-service - ports: - - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - WILM_USE_TRITON_FLASH_ATTENTION: 0 - PYTORCH_JIT: 0 - volumes: - - "./data:/data" - shm_size: 20G - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - - apparmor=unconfined - command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" - ipc: host - - docsum-llm-server: - image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} - container_name: docsum-llm-server - depends_on: - - docsum-vllm-service - ports: - - "${DOCSUM_LLM_SERVER_PORT:-9000}:9000" - ipc: host - cap_add: - - SYS_PTRACE - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID} - LOGFLAG: ${DOCSUM_LOGFLAG:-False} - MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS} - MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS} - restart: unless-stopped - - whisper-service: - image: ${REGISTRY:-opea}/whisper:${TAG:-latest} - container_name: whisper-service - ports: - - "${DOCSUM_WHISPER_PORT:-7066}:7066" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - restart: unless-stopped - - docsum-backend-server: - image: ${REGISTRY:-opea}/docsum:${TAG:-latest} - container_name: docsum-backend-server - depends_on: - - docsum-tgi-service - - docsum-llm-server - ports: - - "${DOCSUM_BACKEND_SERVER_PORT:-8888}:8888" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - MEGA_SERVICE_HOST_IP: ${HOST_IP} - LLM_SERVICE_HOST_IP: ${HOST_IP} - ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP} - ipc: host - restart: always - - docsum-gradio-ui: - image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} - container_name: docsum-ui-server - depends_on: - - docsum-backend-server - ports: - - "${DOCSUM_FRONTEND_PORT:-5173}:5173" - environment: - no_proxy: ${no_proxy} - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - BACKEND_SERVICE_ENDPOINT: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - DOC_BASE_URL: ${DOCSUM_BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh b/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh deleted file mode 100644 index 43e71e0fbf..0000000000 --- a/DocSum/docker_compose/amd/gpu/rocm-vllm/set_env.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP="" -export DOCSUM_MAX_INPUT_TOKENS=2048 -export DOCSUM_MAX_TOTAL_TOKENS=4096 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN="" -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export DOCSUM_BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" diff --git a/DocSum/tests/test_compose_on_rocm_vllm.sh b/DocSum/tests/test_compose_on_rocm_vllm.sh deleted file mode 100644 index d0919a019a..0000000000 --- a/DocSum/tests/test_compose_on_rocm_vllm.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -xe -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -export MAX_INPUT_TOKENS=1024 -export MAX_TOTAL_TOKENS=2048 -export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" -export HOST_IP=${ip_address} -export DOCSUM_VLLM_SERVICE_PORT="8008" -export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export DOCSUM_LLM_SERVER_PORT="9000" -export DOCSUM_WHISPER_PORT="7066" -export DOCSUM_BACKEND_SERVER_PORT="8888" -export DOCSUM_FRONTEND_PORT="5173" -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LLM_SERVICE_HOST_IP=${HOST_IP} -export ASR_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="vllm_rocm llm-docsum docsum docsum-gradio-ui whisper" - docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - - docker images && sleep 1s -} - -function start_services() { - cd "$WORKPATH"/docker_compose/amd/gpu/rocm-vllm - sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env - # Start Docker Containers - docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log - sleep 1m -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - - echo "===========================================" - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "CONTENT==> $CONTENT" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -get_base64_str() { - local file_name=$1 - base64 -w 0 "$file_name" -} - -# Function to generate input data for testing based on the document type -input_data_for_test() { - local document_type=$1 - case $document_type in - ("text") - echo "THIS IS A TEST >>>> and a number of states are starting to adopt them voluntarily special correspondent john delenco of education week reports it takes just 10 minutes to cross through gillette wyoming this small city sits in the northeast corner of the state surrounded by 100s of miles of prairie but schools here in campbell county are on the edge of something big the next generation science standards you are going to build a strand of dna and you are going to decode it and figure out what that dna actually says for christy mathis at sage valley junior high school the new standards are about learning to think like a scientist there is a lot of really good stuff in them every standard is a performance task it is not you know the child needs to memorize these things it is the student needs to be able to do some pretty intense stuff we are analyzing we are critiquing we are." - ;; - ("audio") - get_base64_str "$WORKPATH/tests/data/test.wav" - ;; - ("video") - get_base64_str "$WORKPATH/tests/data/test.mp4" - ;; - (*) - echo "Invalid document type" >&2 - exit 1 - ;; - esac -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # whisper microservice - ulimit -s 65536 - validate_services \ - "${HOST_IP}:${DOCSUM_WHISPER_PORT}/v1/asr" \ - '{"asr_result":"well"}' \ - "whisper-service" \ - "whisper-service" \ - "{\"audio\": \"$(input_data_for_test "audio")\"}" - - # vLLM service - validate_services \ - "${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}/v1/chat/completions" \ - "generated_text" \ - "docsum-vllm-service" \ - "docsum-vllm-service" \ - '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' - - # llm microservice - validate_services \ - "${HOST_IP}:${DOCSUM_LLM_SERVER_PORT}/v1/docsum" \ - "text" \ - "docsum-llm-server" \ - "docsum-llm-server" \ - '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - -} - -function validate_megaservice() { - local SERVICE_NAME="docsum-backend-server" - local DOCKER_NAME="docsum-backend-server" - local EXPECTED_RESULT="[DONE]" - local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." - local URL="${host_ip}:8888/v1/docsum" - local DATA_TYPE="type=text" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -F "$DATA_TYPE" -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_megaservice_json() { - # Curl the Mega Service - echo "" - echo ">>> Checking text data with Content-Type: application/json" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - '{"type": "text", "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' - - echo ">>> Checking audio data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"audio\", \"messages\": \"$(input_data_for_test "audio")\"}" - - echo ">>> Checking video data" - validate_services \ - "${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum" \ - "[DONE]" \ - "docsum-backend-server" \ - "docsum-backend-server" \ - "{\"type\": \"video\", \"messages\": \"$(input_data_for_test "video")\"}" - -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm-vllm/ - docker compose stop && docker compose rm -f -} - -function main() { - echo "===========================================" - echo ">>>> Stopping any running Docker containers..." - stop_docker - - echo "===========================================" - if [[ "$IMAGE_REPO" == "opea" ]]; then - echo ">>>> Building Docker images..." - build_docker_images - fi - - echo "===========================================" - echo ">>>> Starting Docker services..." - start_services - - echo "===========================================" - echo ">>>> Validating microservices..." - validate_microservices - - echo "===========================================" - echo ">>>> Validating megaservice..." - validate_megaservice - echo ">>>> Validating validate_megaservice_json..." - validate_megaservice_json - - echo "===========================================" - echo ">>>> Stopping Docker containers..." - stop_docker - - echo "===========================================" - echo ">>>> Pruning Docker system..." - echo y | docker system prune - echo ">>>> Docker system pruned successfully." - echo "===========================================" -} - -main From 316d919772e913c05496cbfd127b618c1e72d1c0 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Fri, 4 Jul 2025 13:10:28 +0700 Subject: [PATCH 18/23] test_docker_runner Signed-off-by: Chingis Yundunov --- DocSum/docker_compose/amd/gpu/rocm/compose.yaml | 1 + DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml index 563f1468f1..dfae1c5b5f 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml @@ -108,3 +108,4 @@ services: networks: default: driver: bridge + diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml index d8a678f695..2b4e6f0be3 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -108,3 +108,4 @@ services: networks: default: driver: bridge + From b0407c0c16e96c28115ba99e554ebf5bdd97281e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 4 Jul 2025 06:11:22 +0000 Subject: [PATCH 19/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- DocSum/docker_compose/amd/gpu/rocm/README.md | 4 +++- DocSum/docker_compose/amd/gpu/rocm/compose.yaml | 1 - DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DocSum/docker_compose/amd/gpu/rocm/README.md b/DocSum/docker_compose/amd/gpu/rocm/README.md index 92922f4b65..56c0fc022c 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/README.md +++ b/DocSum/docker_compose/amd/gpu/rocm/README.md @@ -129,7 +129,7 @@ Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs Please refer to the table below to build different microservices from source: | Microservice | Deployment Guide | -|--------------| ------------------------------------------------------------------------------------------------------------------------------------- | +| ------------ | ------------------------------------------------------------------------------------------------------------------------------------- | | whisper | [whisper build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/whisper/src) | | TGI | [TGI project](https://github.com/huggingface/text-generation-inference.git) | | vLLM | [vLLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/vllm#build-docker) | @@ -148,6 +148,7 @@ docker ps -a For the default deployment, the following 5 containers should have started: If used TGI: + ``` CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 748f577b3c78 opea/whisper:latest "python whisper_s…" 5 minutes ago Up About a minute 0.0.0.0:7066->7066/tcp, :::7066->7066/tcp whisper-service @@ -158,6 +159,7 @@ fds3dd5b9fd8 opea/docsum:latest "py ``` If used vLLM: + ``` CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 748f577b3c78 opea/whisper:latest "python whisper_s…" 5 minutes ago Up About a minute 0.0.0.0:7066->7066/tcp, :::7066->7066/tcp whisper-service diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml index dfae1c5b5f..563f1468f1 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml @@ -108,4 +108,3 @@ services: networks: default: driver: bridge - diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 2b4e6f0be3..d8a678f695 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -108,4 +108,3 @@ services: networks: default: driver: bridge - From 44e6c81a3be3ccfdcee874f1e930eed657b88900 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Fri, 4 Jul 2025 13:16:39 +0700 Subject: [PATCH 20/23] test_docker_runner Signed-off-by: Chingis Yundunov --- DocSum/docker_compose/amd/gpu/rocm/compose.yaml | 1 - DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 1 - 2 files changed, 2 deletions(-) diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml index dfae1c5b5f..563f1468f1 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml @@ -108,4 +108,3 @@ services: networks: default: driver: bridge - diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 2b4e6f0be3..d8a678f695 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -108,4 +108,3 @@ services: networks: default: driver: bridge - From d6ad48b7777eda444c57123c5d64b97848828cb8 Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Fri, 4 Jul 2025 13:19:01 +0700 Subject: [PATCH 21/23] test_docker_runner Signed-off-by: Chingis Yundunov --- DocSum/docker_compose/amd/gpu/rocm/compose.yaml | 1 + DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml index 563f1468f1..dfae1c5b5f 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml @@ -108,3 +108,4 @@ services: networks: default: driver: bridge + diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml index d8a678f695..2b4e6f0be3 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -108,3 +108,4 @@ services: networks: default: driver: bridge + From 533b1fd860dc22598666869f4607b79719c1ba89 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 4 Jul 2025 06:19:35 +0000 Subject: [PATCH 22/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- DocSum/docker_compose/amd/gpu/rocm/compose.yaml | 1 - DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 1 - 2 files changed, 2 deletions(-) diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml index dfae1c5b5f..563f1468f1 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml @@ -108,4 +108,3 @@ services: networks: default: driver: bridge - diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml index 2b4e6f0be3..d8a678f695 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -108,4 +108,3 @@ services: networks: default: driver: bridge - From e0553ef3f37244f68174073deaf3ad02916fbdab Mon Sep 17 00:00:00 2001 From: Chingis Yundunov Date: Fri, 4 Jul 2025 13:24:11 +0700 Subject: [PATCH 23/23] test_docker_runner Signed-off-by: Chingis Yundunov --- DocSum/docker_compose/amd/gpu/rocm/compose.yaml | 1 + DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml index 563f1468f1..62a588292d 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml @@ -8,6 +8,7 @@ services: ports: - "${DOCSUM_TGI_SERVICE_PORT:-8008}:80" environment: + test_env: test no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} diff --git a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml index d8a678f695..4f3aa9308d 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml +++ b/DocSum/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -8,6 +8,7 @@ services: ports: - "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011" environment: + test_env: test no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy}