diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 788df19..0000000 --- a/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.DS_Store -infer_output/* -models/* -anygpt/seed/* -.gitignore \ No newline at end of file diff --git a/Docker/Dockerfile b/Docker/Dockerfile new file mode 100644 index 0000000..d9b5ddc --- /dev/null +++ b/Docker/Dockerfile @@ -0,0 +1,39 @@ +FROM nvidia/cuda:12.0.1-cudnn8-runtime-ubuntu22.04 +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get upgrade -y \ + && apt-get install -y --no-install-recommends \ + git \ + gcc \ + curl \ + wget \ + sudo \ + pciutils \ + python3-all-dev \ + python-is-python3 \ + python3-pip \ + ffmpeg \ + libsdl2-dev \ + pulseaudio \ + alsa-utils \ + portaudio19-dev \ + && pip install pip -U + +WORKDIR /app + +RUN git clone https://github.com/Sunwood-ai-labs/AnyGPT-JP.git . + +RUN pip install -r requirements.txt + +RUN mkdir -p models/anygpt \ + && mkdir -p models/seed-tokenizer-2 \ + && mkdir -p models/speechtokenizer \ + && mkdir -p models/soundstorm + +RUN pip install bigmodelvis \ + && pip install peft \ + && pip install --upgrade huggingface_hub + +# COPY scripts/download_models.py /app/scripts/download_models.py +# RUN python scripts/download_models.py diff --git a/Docker/README.JP.md b/Docker/README.JP.md new file mode 100644 index 0000000..7a7c82e --- /dev/null +++ b/Docker/README.JP.md @@ -0,0 +1,43 @@ +# AnyGPTのDockerでの実行方法 + +このREADMEでは、AnyGPTをDockerを用いて実行する方法を説明します。 + +## 前提条件 + +- Dockerがインストール済みであること +- GPU環境で実行する場合は、NVIDIA Container Toolkitがインストール済みであること + +## 手順 + + +1. 以下のコマンドを実行して、Dockerイメージをビルドします。 + ```bash + docker-compose up --build + ``` + +2. モデルをダウンロードします。 + ```bash + docker-compose run anygpt python /app/scripts/download_models.py + ``` + +3. 推論を実行します。 + ```bash + docker-compose run anygpt python anygpt/src/infer/cli_infer_base_model.py \ + --model-name-or-path models/anygpt/base \ + --image-tokenizer-path models/seed-tokenizer-2/seed_quantizer.pt \ + --speech-tokenizer-path models/speechtokenizer/ckpt.dev \ + --speech-tokenizer-config models/speechtokenizer/config.json \ + --soundstorm-path models/soundstorm/speechtokenizer_soundstorm_mls.pt \ + --output-dir "infer_output/base" + ``` + +6. 推論結果は `docker/infer_output/base` ディレクトリに出力されます。 + +## トラブルシューティング + +- モデルのダウンロードに失敗する場合は、`download_models.py`スクリプトを確認し、必要に応じてURLを更新してください。 +- 推論の実行に失敗する場合は、コマンドの引数を確認し、モデルのパスが正しいことを確認してください。 + +## 注意事項 + +- モデルのダウンロードと推論の実行には、大量のメモリとディスク容量が必要です。十分なリソースを確保してください \ No newline at end of file diff --git a/Docker/README.md b/Docker/README.md new file mode 100644 index 0000000..9cef503 --- /dev/null +++ b/Docker/README.md @@ -0,0 +1,42 @@ +# Running AnyGPT with Docker + +This README explains how to run AnyGPT using Docker. + +## Prerequisites + +- Docker is installed +- NVIDIA Container Toolkit is installed if running in a GPU environment + +## Steps + +1. Build the Docker image by running the following command: + ```bash + docker-compose up --build + ``` + +2. Download the models: + ```bash + docker-compose run anygpt python /app/scripts/download_models.py + ``` + +3. Run the inference: + ```bash + docker-compose run anygpt python anygpt/src/infer/cli_infer_base_model.py \ + --model-name-or-path models/anygpt/base \ + --image-tokenizer-path models/seed-tokenizer-2/seed_quantizer.pt \ + --speech-tokenizer-path models/speechtokenizer/ckpt.dev \ + --speech-tokenizer-config models/speechtokenizer/config.json \ + --soundstorm-path models/soundstorm/speechtokenizer_soundstorm_mls.pt \ + --output-dir "infer_output/base" + ``` + +4. The inference results will be output to the `docker/infer_output/base` directory. + +## Troubleshooting + +- If the model download fails, check the `download_models.py` script and update the URLs if necessary. +- If the inference execution fails, check the command arguments and ensure that the model paths are correct. + +## Notes + +- Downloading the models and running the inference requires a large amount of memory and disk space. Ensure that sufficient resources are available. \ No newline at end of file diff --git a/anygpt/src/__pycache__/__init__.cpython-39.pyc b/anygpt/src/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 33fb026..0000000 Binary files a/anygpt/src/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/infer/__pycache__/__init__.cpython-39.pyc b/anygpt/src/infer/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 415718a..0000000 Binary files a/anygpt/src/infer/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/infer/__pycache__/pre_post_process.cpython-39.pyc b/anygpt/src/infer/__pycache__/pre_post_process.cpython-39.pyc deleted file mode 100644 index c8fe730..0000000 Binary files a/anygpt/src/infer/__pycache__/pre_post_process.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/infer/__pycache__/voice_clone.cpython-39.pyc b/anygpt/src/infer/__pycache__/voice_clone.cpython-39.pyc deleted file mode 100644 index af4f1a4..0000000 Binary files a/anygpt/src/infer/__pycache__/voice_clone.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/infer/pre_post_process.py b/anygpt/src/infer/pre_post_process.py index d1d299e..32fed81 100644 --- a/anygpt/src/infer/pre_post_process.py +++ b/anygpt/src/infer/pre_post_process.py @@ -4,7 +4,7 @@ sys.path.append("/mnt/petrelfs/zhanjun.p/mllm") sys.path.append("/mnt/petrelfs/zhanjun.p/src") from transformers import GenerationConfig -from mmgpt.src.m_utils.prompter_mmgpt import Prompter +from anygpt.src.m_utils.prompter import Prompter from tqdm import tqdm from m_utils.conversation import get_conv_template diff --git a/anygpt/src/m_utils/__pycache__/__init__.cpython-311.pyc b/anygpt/src/m_utils/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 72023df..0000000 Binary files a/anygpt/src/m_utils/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/__init__.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index d7acf29..0000000 Binary files a/anygpt/src/m_utils/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/anything2token.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/anything2token.cpython-39.pyc deleted file mode 100644 index ac5e34f..0000000 Binary files a/anygpt/src/m_utils/__pycache__/anything2token.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/conversation.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/conversation.cpython-39.pyc deleted file mode 100644 index 2031e38..0000000 Binary files a/anygpt/src/m_utils/__pycache__/conversation.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/instructions.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/instructions.cpython-39.pyc deleted file mode 100644 index cf31feb..0000000 Binary files a/anygpt/src/m_utils/__pycache__/instructions.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/other2text_instructions.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/other2text_instructions.cpython-39.pyc deleted file mode 100644 index 6b3ee8c..0000000 Binary files a/anygpt/src/m_utils/__pycache__/other2text_instructions.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/output.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/output.cpython-39.pyc deleted file mode 100644 index 3941908..0000000 Binary files a/anygpt/src/m_utils/__pycache__/output.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/prompter.cpython-311.pyc b/anygpt/src/m_utils/__pycache__/prompter.cpython-311.pyc deleted file mode 100644 index d453b10..0000000 Binary files a/anygpt/src/m_utils/__pycache__/prompter.cpython-311.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/prompter.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/prompter.cpython-39.pyc deleted file mode 100644 index 7c9efbc..0000000 Binary files a/anygpt/src/m_utils/__pycache__/prompter.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/prompter_mmgpt.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/prompter_mmgpt.cpython-39.pyc deleted file mode 100644 index d1b523b..0000000 Binary files a/anygpt/src/m_utils/__pycache__/prompter_mmgpt.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/prompter_old.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/prompter_old.cpython-39.pyc deleted file mode 100644 index d0f19fa..0000000 Binary files a/anygpt/src/m_utils/__pycache__/prompter_old.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/read_modality.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/read_modality.cpython-39.pyc deleted file mode 100644 index e46cbcf..0000000 Binary files a/anygpt/src/m_utils/__pycache__/read_modality.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/templates.cpython-311.pyc b/anygpt/src/m_utils/__pycache__/templates.cpython-311.pyc deleted file mode 100644 index 5b2317a..0000000 Binary files a/anygpt/src/m_utils/__pycache__/templates.cpython-311.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/templates.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/templates.cpython-39.pyc deleted file mode 100644 index 396e790..0000000 Binary files a/anygpt/src/m_utils/__pycache__/templates.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/templates_old.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/templates_old.cpython-39.pyc deleted file mode 100644 index dbb49f5..0000000 Binary files a/anygpt/src/m_utils/__pycache__/templates_old.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/text2other_instructions.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/text2other_instructions.cpython-39.pyc deleted file mode 100644 index f5228d0..0000000 Binary files a/anygpt/src/m_utils/__pycache__/text2other_instructions.cpython-39.pyc and /dev/null differ diff --git a/anygpt/src/m_utils/__pycache__/transforms.cpython-39.pyc b/anygpt/src/m_utils/__pycache__/transforms.cpython-39.pyc deleted file mode 100644 index 06ed9ea..0000000 Binary files a/anygpt/src/m_utils/__pycache__/transforms.cpython-39.pyc and /dev/null differ diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..39c6464 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,21 @@ +version: '3' +services: + anygpt: + image: anygpt + build: + context: ./Docker + dockerfile: Dockerfile + volumes: + - ./:/app + - ./.cache:/root/.cache + + env_file: + - .env + tty: true + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [ gpu ] \ No newline at end of file diff --git a/scripts/cli_infer_base_model2.sh b/scripts/cli_infer_base_model2.sh new file mode 100644 index 0000000..9df4b0d --- /dev/null +++ b/scripts/cli_infer_base_model2.sh @@ -0,0 +1,82 @@ +out_dir="infer_output/base" +mkdir -p ${out_dir} + +python anygpt/src/infer/cli_infer_base_model.py \ + --model-name-or-path models/anygpt/base \ + --image-tokenizer-path models/seed-tokenizer-2/seed_quantizer.pt \ + --speech-tokenizer-path models/speechtokenizer/ckpt.dev \ + --speech-tokenizer-config models/speechtokenizer/config.json \ + --soundstorm-path models/soundstorm/speechtokenizer_soundstorm_mls.pt \ + --output-dir ${out_dir} + + + +# image|text|data/images/testset/aYQ2uNa.jpg +# image|text|data/images/testset/image-20231121155007517.png +# image|text|data/images/testset/gpt4 test images/4.png + + +# text|image|a happy dog running on the grass +# text|image|A group of students leaving the school +# text|image|a happy boy playing with his dog +# text|image|a sunset behind a mountain range +# text|image|a beautiful lake, surrounded by mountains +# text|image|a kitten curled up on the ground with its eyes closed behind a tree +# text|image|An animated version of Iron Man +# text|image|A Superman in flight. + + +# speech|text|data/speech/testset2.jsonl + +# text|speech|to be or not to be, this is a question +# text|speech|The primary colors are red, blue, and yellow. These colors are the building blocks of all other colors and are used to create the full spectrum of colors. +# text|speech|Going to the moon is a challenging task that requires a lot of planning and resources. To do this, you will need to develop a spacecraft that can withstand the extreme conditions of the moon's atmosphere +# text|speech|Going to the moon is a challenging task that requires a lot of planning and resources. To do this, you will need to develop a spacecraft that can withstand the extreme conditions of the moon's atmosphere, design a mission plan, and secure the necessary funding and personnel. Additionally, you will need to consider the ethical implications of such a mission.|data/speech/prompt/prompt3.wav +# text|speech|Yes, I do know Stephen Curry.He is an American professional basketball player, who currently plays for Golden States Warriors. He is two-time NBA most valuable player and four-time NBA all star.|data/speech/prompt/prompt3.wav +# text|speech|hello world, hello everyone +# text|speech|hello world +# text|speech|The capital of France is Paris. It is located in the northern part of the country, along the Seine River. +# text|speech|hello world, hello everyone|/mnt/petrelfs/zhanjun.p/mllm/data/speech/prompt/prompt (1).wav +# text|speech|Yes, I do know Stephen Curry.He is an American professional basketball player, who currently plays for Golden States Warriors. He is two-time NBA most valuable player and four-time NBA all star.|/mnt/petrelfs/zhanjun.p/mllm/data/speech/testset/mls-test-1.wav + +# text|speech|Going to the moon is a challenging task that requires a lot of planning and resources. To do this, you will need to develop a spacecraft that can withstand the extreme conditions of the moon's atmosphere|/mnt/petrelfs/zhanjun.p/mllm/data/speech/prompt/prompt3.wav +# text|speech|The primary colors are red, blue, and yellow. These colors are the building blocks of all other colors and are used to create the full spectrum of colors.|/mnt/petrelfs/zhanjun.p/mllm/data/speech/prompt/LJ049-0185_24K.wav +# text|speech|The capital of France is Paris. It is located in the northern part of the country, along the Seine River.|/mnt/petrelfs/zhanjun.p/mllm/data/speech/testset/vctk-1.wav +# text|speech|hey guys, i am moss|/mnt/petrelfs/zhanjun.p/mllm/data/speech/prompt/moss-1.wav +# text|speech|hey guys, i am moss. i am an artificial intelligence made by fudan university|/mnt/petrelfs/zhanjun.p/mllm/data/speech/prompt/prompt1.wav +# text|speech|The primary colors are red, blue, and yellow. These colors are the building blocks of all other colors and are used to create the full spectrum of colors.|data/speech/test_case/2.wav + +# text|audio|a bird is chirping. +# text|audio|A passionate drum set. +# text|audio|A dog is barking. +# text|audio|A man walking alone on a muddy road. +# text|audio|The roar of a tiger. +# text|audio|A passionate drum set. +# text|audio|The waves crashed against the beach. +# text|audio|A gunshot is being fired. + +# audio|text|/mnt/petrelfs/zhanjun.p/mllm/data/audio/沉重的咕噜声..._耳聆网_[声音ID:10492].mp3 +# audio|text|/mnt/petrelfs/zhanjun.p/mllm/data/audio/狮子咆哮_耳聆网_[声音ID:11539].wav +# audio|text|/mnt/petrelfs/zhanjun.p/mllm/infer_output/audio_pretrain_4n_2ga_true/checkpoint-37000/a bird is chirping1203_160539.wav +# audio|text|/mnt/petrelfs/zhanjun.p/mllm/infer_output/audio_pretrain_4n_2ga_true/checkpoint-37000/A dog is barking.1203_155916.wav + +# text|music|A passionate drum set. +# text|music|a lilting piano melody. +# text|music|Music with a slow and grand rhythm. +# text|music|features an indie rock sound with distinct elements that evoke a dreamy, soothing atmosphere +# text|music|Slow tempo, bass-and-drums-led reggae song. Sustained electric guitar. High-pitched bongos with ringing tones. Vocals are relaxed with a laid-back feel, very expressive. + +# sh scripts/infer_cli.sh visual_inter_speech_golden_fs/checkpoint-31000 +# sh scripts/infer_cli.sh visual_inter/checkpoint-14000 +# sh scripts/infer_cli.sh visual_inter_true/checkpoint-8000 +# sh scripts/infer_cli.sh visual_mix_template/checkpoint-5000 +# sh scripts/infer_cli.sh speech_pretrain/checkpoint-14000 +# sh scripts/infer_cli.sh visual_cc_sbu/checkpoint-4000 +# sh scripts/infer_cli.sh visual_laion_no_group/checkpoint-23000 +# sh scripts/infer_cli.sh visual_group_4nodes/checkpoint-51000 +# sh scripts/infer_cli.sh music_pretrain_4n_4ga/checkpoint-10000 +# sh scripts/infer_cli.sh audio_pretrain_4n_2ga/checkpoint-11000 + +# sh scripts/infer_cli.sh music_pretrain_20s_8n_2ga/checkpoint-58000 +# sh scripts/infer_cli.sh audio_pretrain_4n_2ga_true/checkpoint-37000 +# sh scripts/infer_cli.sh audio_pretrain_4n_2ga_true/checkpoint-50000 \ No newline at end of file diff --git a/scripts/download_models.py b/scripts/download_models.py new file mode 100644 index 0000000..cb18d16 --- /dev/null +++ b/scripts/download_models.py @@ -0,0 +1,9 @@ +from huggingface_hub import snapshot_download + +def download_models(): + snapshot_download(repo_id='fnlp/AnyGPT-base', local_dir='models/anygpt/base') + snapshot_download(repo_id='AILab-CVC/seed-tokenizer-2', local_dir='models/seed-tokenizer-2') + snapshot_download(repo_id='fnlp/AnyGPT-speech-modules', local_dir='models') + +if __name__ == '__main__': + download_models() \ No newline at end of file diff --git a/seed2/__pycache__/__init__.cpython-311.pyc b/seed2/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 6f2066d..0000000 Binary files a/seed2/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/seed2/__pycache__/__init__.cpython-39.pyc b/seed2/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 2ea1c80..0000000 Binary files a/seed2/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/seed2/__pycache__/llama_xformer.cpython-39.pyc b/seed2/__pycache__/llama_xformer.cpython-39.pyc deleted file mode 100644 index ecab766..0000000 Binary files a/seed2/__pycache__/llama_xformer.cpython-39.pyc and /dev/null differ diff --git a/seed2/__pycache__/model_tools.cpython-39.pyc b/seed2/__pycache__/model_tools.cpython-39.pyc deleted file mode 100644 index 5b446d7..0000000 Binary files a/seed2/__pycache__/model_tools.cpython-39.pyc and /dev/null differ diff --git a/seed2/__pycache__/pipeline_stable_unclip_img2img.cpython-311.pyc b/seed2/__pycache__/pipeline_stable_unclip_img2img.cpython-311.pyc deleted file mode 100644 index 88d42f2..0000000 Binary files a/seed2/__pycache__/pipeline_stable_unclip_img2img.cpython-311.pyc and /dev/null differ diff --git a/seed2/__pycache__/pipeline_stable_unclip_img2img.cpython-39.pyc b/seed2/__pycache__/pipeline_stable_unclip_img2img.cpython-39.pyc deleted file mode 100644 index a7714e8..0000000 Binary files a/seed2/__pycache__/pipeline_stable_unclip_img2img.cpython-39.pyc and /dev/null differ diff --git a/seed2/__pycache__/seed_llama_tokenizer.cpython-311.pyc b/seed2/__pycache__/seed_llama_tokenizer.cpython-311.pyc deleted file mode 100644 index ede0dfe..0000000 Binary files a/seed2/__pycache__/seed_llama_tokenizer.cpython-311.pyc and /dev/null differ diff --git a/seed2/__pycache__/seed_llama_tokenizer.cpython-39.pyc b/seed2/__pycache__/seed_llama_tokenizer.cpython-39.pyc deleted file mode 100644 index 8c84d9e..0000000 Binary files a/seed2/__pycache__/seed_llama_tokenizer.cpython-39.pyc and /dev/null differ diff --git a/seed2/__pycache__/transforms.cpython-311.pyc b/seed2/__pycache__/transforms.cpython-311.pyc deleted file mode 100644 index 9e57ba7..0000000 Binary files a/seed2/__pycache__/transforms.cpython-311.pyc and /dev/null differ diff --git a/seed2/__pycache__/transforms.cpython-39.pyc b/seed2/__pycache__/transforms.cpython-39.pyc deleted file mode 100644 index d382609..0000000 Binary files a/seed2/__pycache__/transforms.cpython-39.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/blip2.cpython-311.pyc b/seed2/seed_qformer/__pycache__/blip2.cpython-311.pyc deleted file mode 100644 index a63b3a8..0000000 Binary files a/seed2/seed_qformer/__pycache__/blip2.cpython-311.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/blip2.cpython-39.pyc b/seed2/seed_qformer/__pycache__/blip2.cpython-39.pyc deleted file mode 100644 index ee7fee8..0000000 Binary files a/seed2/seed_qformer/__pycache__/blip2.cpython-39.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/clip_vit.cpython-311.pyc b/seed2/seed_qformer/__pycache__/clip_vit.cpython-311.pyc deleted file mode 100644 index be46ef8..0000000 Binary files a/seed2/seed_qformer/__pycache__/clip_vit.cpython-311.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/clip_vit.cpython-39.pyc b/seed2/seed_qformer/__pycache__/clip_vit.cpython-39.pyc deleted file mode 100644 index 7896670..0000000 Binary files a/seed2/seed_qformer/__pycache__/clip_vit.cpython-39.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/eva_vit.cpython-311.pyc b/seed2/seed_qformer/__pycache__/eva_vit.cpython-311.pyc deleted file mode 100644 index a18c00a..0000000 Binary files a/seed2/seed_qformer/__pycache__/eva_vit.cpython-311.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/eva_vit.cpython-39.pyc b/seed2/seed_qformer/__pycache__/eva_vit.cpython-39.pyc deleted file mode 100644 index ce8d519..0000000 Binary files a/seed2/seed_qformer/__pycache__/eva_vit.cpython-39.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/qformer_causual.cpython-311.pyc b/seed2/seed_qformer/__pycache__/qformer_causual.cpython-311.pyc deleted file mode 100644 index 63f9d43..0000000 Binary files a/seed2/seed_qformer/__pycache__/qformer_causual.cpython-311.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/qformer_causual.cpython-39.pyc b/seed2/seed_qformer/__pycache__/qformer_causual.cpython-39.pyc deleted file mode 100644 index 52d8596..0000000 Binary files a/seed2/seed_qformer/__pycache__/qformer_causual.cpython-39.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/qformer_quantizer.cpython-311.pyc b/seed2/seed_qformer/__pycache__/qformer_quantizer.cpython-311.pyc deleted file mode 100644 index d658aa5..0000000 Binary files a/seed2/seed_qformer/__pycache__/qformer_quantizer.cpython-311.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/qformer_quantizer.cpython-39.pyc b/seed2/seed_qformer/__pycache__/qformer_quantizer.cpython-39.pyc deleted file mode 100644 index 9cc89fb..0000000 Binary files a/seed2/seed_qformer/__pycache__/qformer_quantizer.cpython-39.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/utils.cpython-311.pyc b/seed2/seed_qformer/__pycache__/utils.cpython-311.pyc deleted file mode 100644 index 6c9b61f..0000000 Binary files a/seed2/seed_qformer/__pycache__/utils.cpython-311.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/utils.cpython-39.pyc b/seed2/seed_qformer/__pycache__/utils.cpython-39.pyc deleted file mode 100644 index d6eba02..0000000 Binary files a/seed2/seed_qformer/__pycache__/utils.cpython-39.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/vit.cpython-311.pyc b/seed2/seed_qformer/__pycache__/vit.cpython-311.pyc deleted file mode 100644 index 3ea1f20..0000000 Binary files a/seed2/seed_qformer/__pycache__/vit.cpython-311.pyc and /dev/null differ diff --git a/seed2/seed_qformer/__pycache__/vit.cpython-39.pyc b/seed2/seed_qformer/__pycache__/vit.cpython-39.pyc deleted file mode 100644 index 39ac18b..0000000 Binary files a/seed2/seed_qformer/__pycache__/vit.cpython-39.pyc and /dev/null differ diff --git a/seed2/seed_qformer/blip2.py b/seed2/seed_qformer/blip2.py index a438696..eed787e 100644 --- a/seed2/seed_qformer/blip2.py +++ b/seed2/seed_qformer/blip2.py @@ -35,7 +35,7 @@ def device(self): @classmethod def init_tokenizer(cls, truncation_side="right"): - tokenizer = BertTokenizer.from_pretrained("/mnt/petrelfs/zhanjun.p/mllm/models/bert-base-uncased", truncation_side=truncation_side) + tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased", truncation_side=truncation_side) tokenizer.add_special_tokens({"bos_token": "[DEC]"}) return tokenizer @@ -51,13 +51,13 @@ def maybe_autocast(self, dtype=torch.float16): @classmethod def init_Qformer(cls, num_query_token, vision_width, cross_attention_freq=2): - encoder_config = BertConfig.from_pretrained("/mnt/petrelfs/zhanjun.p/mllm/models/bert-base-uncased", ) + encoder_config = BertConfig.from_pretrained("google-bert/bert-base-uncased", ) encoder_config.encoder_width = vision_width # insert cross-attention layer every other block encoder_config.add_cross_attention = True encoder_config.cross_attention_freq = cross_attention_freq encoder_config.query_length = num_query_token - Qformer = BertLMHeadModel.from_pretrained("/mnt/petrelfs/zhanjun.p/mllm/models/bert-base-uncased", config=encoder_config) + Qformer = BertLMHeadModel.from_pretrained("google-bert/bert-base-uncased", config=encoder_config) query_tokens = nn.Parameter(torch.zeros(1, num_query_token, encoder_config.hidden_size)) query_tokens.data.normal_(mean=0.0, std=encoder_config.initializer_range) return Qformer, query_tokens @@ -183,4 +183,3 @@ def forward(self, x: torch.Tensor): ret = super().forward(x.type(torch.float32)) return ret.type(orig_type) - diff --git a/soundstorm_speechtokenizer/__pycache__/__init__.cpython-310.pyc b/soundstorm_speechtokenizer/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 4dfeea7..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/__init__.cpython-39.pyc b/soundstorm_speechtokenizer/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 0d2dc45..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/attend.cpython-310.pyc b/soundstorm_speechtokenizer/__pycache__/attend.cpython-310.pyc deleted file mode 100644 index 3c63d38..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/attend.cpython-310.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/attend.cpython-39.pyc b/soundstorm_speechtokenizer/__pycache__/attend.cpython-39.pyc deleted file mode 100644 index 17c2d7f..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/attend.cpython-39.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/dataset.cpython-310.pyc b/soundstorm_speechtokenizer/__pycache__/dataset.cpython-310.pyc deleted file mode 100644 index b9ef561..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/dataset.cpython-310.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/dataset.cpython-39.pyc b/soundstorm_speechtokenizer/__pycache__/dataset.cpython-39.pyc deleted file mode 100644 index 62f1efd..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/dataset.cpython-39.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/optimizer.cpython-310.pyc b/soundstorm_speechtokenizer/__pycache__/optimizer.cpython-310.pyc deleted file mode 100644 index ce7ce86..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/optimizer.cpython-310.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/optimizer.cpython-39.pyc b/soundstorm_speechtokenizer/__pycache__/optimizer.cpython-39.pyc deleted file mode 100644 index 07217e1..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/optimizer.cpython-39.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/soundstorm.cpython-310.pyc b/soundstorm_speechtokenizer/__pycache__/soundstorm.cpython-310.pyc deleted file mode 100644 index 2497df7..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/soundstorm.cpython-310.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/soundstorm.cpython-39.pyc b/soundstorm_speechtokenizer/__pycache__/soundstorm.cpython-39.pyc deleted file mode 100644 index 6f16a2d..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/soundstorm.cpython-39.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/tracking.cpython-310.pyc b/soundstorm_speechtokenizer/__pycache__/tracking.cpython-310.pyc deleted file mode 100644 index 55b95ed..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/tracking.cpython-310.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/tracking.cpython-39.pyc b/soundstorm_speechtokenizer/__pycache__/tracking.cpython-39.pyc deleted file mode 100644 index 504596c..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/tracking.cpython-39.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/trainer.cpython-310.pyc b/soundstorm_speechtokenizer/__pycache__/trainer.cpython-310.pyc deleted file mode 100644 index 743975a..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/trainer.cpython-310.pyc and /dev/null differ diff --git a/soundstorm_speechtokenizer/__pycache__/trainer.cpython-39.pyc b/soundstorm_speechtokenizer/__pycache__/trainer.cpython-39.pyc deleted file mode 100644 index c74287e..0000000 Binary files a/soundstorm_speechtokenizer/__pycache__/trainer.cpython-39.pyc and /dev/null differ