PaddlePaddle · hysunflower · Jan 12, 2022 · Jan 12, 2022 · Jan 12, 2022
diff --git a/OtherFrame/gan/PyTorch/fomm/README.md b/OtherFrame/gan/PyTorch/fomm/README.md
@@ -40,20 +40,15 @@ bash run_PyTorch.sh;     # 创建容器,在该标准环境中测试模型
 ImageName="registry.baidubce.com/paddlepaddle/paddle:2.1.2-gpu-cuda10.2-cudnn7";
 docker pull ${ImageName}
 
-#<<<<<<< gan_benchmark
-#run_cmd="cd /workspace;
-#         cp /workspace/scripts/PrepareEnv.sh ./;
-#         bash PrepareEnv.sh;
-#         cd /workspace/first-order-model/;
 run_cmd="cp /workspace/scripts/PrepareEnv.sh ./;
          bash PrepareEnv.sh;
          cd /workspace/models/fomm;
          cp /workspace/scripts/run_benchmark.sh ./;
          cp /workspace/scripts/analysis_log.py ./;
-         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh fomm_sp_bs8 sp fp32 8 300 4;
-         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh fomm_sp_bs16 sp fp32 16 300 4;
-         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh fomm_mp_bs32 mp fp32 8 300 4;
-         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh fomm_mp_bs64 mp fp32 16 300 4;
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh FOMM_sp_bs8 sp fp32 8 300 4;
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh FOMM_sp_bs16 sp fp32 16 300 4;
+         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh FOMM_mp_bs32 mp fp32 8 300 4;
+         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh FOMM_mp_bs64 mp fp32 16 300 4;
          "
 
 nvidia-docker run --name test_torch_gan -i  \
@@ -67,12 +62,12 @@ nvidia-docker rm test_torch_gan
 
 ## 输出
 
-执行完成后，在当前目录会产出分割模型训练性能数据的文件，比如`fomm_sp_bs8_fp32_1_speed`等文件，内容如下所示。
+执行完成后，在当前目录会产出分割模型训练性能数据的文件，比如`FOMM_sp_bs8_fp32_1_speed`等文件，内容如下所示。
 
 ```bash
 {
-"log_file": "/workspace/models/fomm/fomm_sp_bs8_fp32_1", \    # log 目录,创建规范见PrepareEnv.sh 
-"model_name": "fomm_sp_bs8", \    # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 
+"log_file": "/workspace/models/fomm/FOMM_sp_bs8_fp32_1", \    # log 目录,创建规范见PrepareEnv.sh 
+"model_name": "FOMM_sp_bs8", \    # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 
 "mission_name": "图像生成", \         # 模型case所属任务名称，具体可参考scripts/config.ini      
 "direction_id": 0, \                 # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini    
 "run_mode": "sp", \                  # 单卡:sp|多卡:mp

diff --git a/OtherFrame/gan/PyTorch/fomm/run_PyTorch.sh b/OtherFrame/gan/PyTorch/fomm/run_PyTorch.sh
@@ -3,27 +3,29 @@
 ImageName="registry.baidubce.com/paddlepaddle/paddle:2.1.2-gpu-cuda10.2-cudnn7";
 docker pull ${ImageName}
 
-#<<<<<<< gan_benchmark
-#run_cmd="cd /workspace/;
-#         cp /workspace/scripts/PrepareEnv.sh ./;
-#         bash PrepareEnv.sh;
-#         cd /workspace/first-order-model;
-
 run_cmd="cp /workspace/scripts/PrepareEnv.sh ./;
          bash PrepareEnv.sh;
          cd /workspace/models/fomm;
          cp /workspace/scripts/run_benchmark.sh ./;
          cp /workspace/scripts/analysis_log.py ./;
-         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh fomm sp fp32 8 300 4;
-         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh fomm sp fp32 16 300 4;
-         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh fomm mp fp32 8 300 4;
-         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh fomm mp fp32 16 300 4;
+         sed -i '/set\ -xe/d' benchmark/run_benchmark.sh
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh FOMM sp fp32 8 300 4;
+         CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh FOMM sp fp32 16 300 4;
+         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh FOMM mp fp32 8 300 4;
+         CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh FOMM mp fp32 16 300 4;
          "
 
-nvidia-docker run --name test_torch_gan -it  \
+nvidia-docker run --name test_torch_gan -i  \
     --net=host \
     --shm-size=128g \
     -v $PWD:/workspace \
+    -v /ssd3:/ssd3 \
+    -v /ssd2:/ssd2 \
+    -e "ALL_PATH=${all_path}" \
+    -v "BENCHMARK_ROOT=/workspace" \
+    -e "http_proxy=${http_proxy}" \
+    -e "https_proxy=${http_proxy}" \
+    -e "no_proxy=bcebos.com" \
     ${ImageName}  /bin/bash -c "${run_cmd}"
 
 nvidia-docker stop test_torch_gan

diff --git a/OtherFrame/gan/PyTorch/fomm/scripts/PrepareEnv.sh b/OtherFrame/gan/PyTorch/fomm/scripts/PrepareEnv.sh
@@ -5,10 +5,7 @@ echo "*******prepare benchmark***********"
 
 ################################# 创建一些log目录,如:
 export BENCHMARK_ROOT=/workspace
-log_date=`date "+%Y.%m%d.%H%M%S"`
-frame=pytorch1.0.0
-cuda_version=10.2
-save_log_dir=${BENCHMARK_ROOT}/logs/${frame}_${log_date}_${cuda_version}/
+save_log_dir=${BENCHMARK_ROOT}/logs/
 
 if [[ -d ${save_log_dir} ]]; then
     rm -rf ${save_log_dir}
@@ -31,9 +28,8 @@ export PATH=/workspace/run_env:${PATH}
 pip install -U pip
 echo `pip --version`
 
-git clone https://github.com/lzzyzlbb/first-order-model
+cd /workspace/models/fomm
 git checkout add_log
-cd first-order-model
 pip install -r requirements.txt
 imageio_download_bin ffmpeg
 

diff --git a/OtherFrame/gan/PyTorch/fomm/scripts/analysis_log.py b/OtherFrame/gan/PyTorch/fomm/scripts/analysis_log.py
@@ -28,7 +28,7 @@ def analyze(model_name, log_file, res_log_file):
         total_time = 0
         for i in range(skip_num, len(time_res)):
             total_time += float(time_res[i])
-        ips = total_time / (len(time_res) - skip_num)
+        ips = round(total_time / (len(time_res) - skip_num), 3)
 
     info = {"log_file": log_file, "model_name": model_name, "mission_name": "图像生成",
             "direction_id": 0, "run_mode": run_mode, "index": 1, "gpu_num": gpu_num,

diff --git a/OtherFrame/gan/PyTorch/fomm/scripts/run_benchmark.sh b/OtherFrame/gan/PyTorch/fomm/scripts/run_benchmark.sh
@@ -3,10 +3,10 @@ set -xe
 
 # Test training benchmark for a model.
 
-# Usage: CUDA_VISIBLE_DEVICES=xxx bash run_benchmark.sh ${model_name} ${run_mode} ${fp_item} ${bs_item} ${max_iter} ${num_workers}
+# Usage: CUDA_VISIBLE_DEVICES=xxx bash run_benchmark.sh ${model_item} ${run_mode} ${fp_item} ${bs_item} ${max_iter} ${num_workers}
 
 function _set_params(){
-    model_name=${1:-"model_name"}
+    model_item=${1:-"model_item"}
     run_mode=${2:-"sp"}         # sp or mp
     fp_item=${3:-"fp32"}        # fp32 or fp16
     batch_size=${4:-"2"}
@@ -17,8 +17,9 @@ function _set_params(){
     device=${CUDA_VISIBLE_DEVICES//,/ }
     arr=(${device})
     num_gpu_devices=${#arr[*]}
-    log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
-    res_log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}_speed
+    log_file=${run_log_path}/${model_item}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
+    res_log_file=${run_log_path}/${model_item}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}_speed
+    model_name=${model_item}_bs${batch_size}_${fp_item}
 }
 
 function _analysis_log(){

diff --git a/OtherFrame/gan/PyTorch/mmedting/run_PyTorch.sh b/OtherFrame/gan/PyTorch/mmedting/run_PyTorch.sh
@@ -9,25 +9,32 @@ run_cmd="cp /workspace/scripts/PrepareEnv.sh ./;
          cp -r /workspace/mmedi_benchmark_configs ./;
          cp /workspace/scripts/run_benchmark.sh ./;
          cp /workspace/scripts/analysis_log.py ./;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh esrgan_sp_bs32 sp fp32 32 300 4;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh esrgan_sp_bs64 sp fp32 64 300 4;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh esrgan_mp_bs32 mp fp32 32 300 4;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh esrgan_mp_bs64 mp fp32 64 300 4;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh edvr_sp_bs4 sp fp32 4 300 3;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh edvr_sp_bs64 sp fp32 64 300 3;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh edvr_mp_bs4 mp fp32 4 300 3;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh edvr_mp_bs64 mp fp32 64 300 3;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh basicvsr_sp_bs2 sp fp32 2 300 4;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh basicvsr_sp_bs4 sp fp32 4 300 4;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3 bash run_benchmark.sh basicvsr_mp_bs2 mp fp32 2 300 4;
-         PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3 bash run_benchmark.sh basicvsr_mp_bs4 mp fp32 4 300 4;
+         sed -i '/set\ -xe/d' run_benchmark.sh
+         PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh esrgan_bs32_fp32 sp fp32 32 300 4;
+         PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh esrgan_bs32_fp32 mp fp32 32 300 4;
+         PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh edvr_bs4_fp32 sp fp32 4 300 3;
+         PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh edvr_bs4_fp32 mp fp32 4 300 3;
+         PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh basicvsr_bs2_fp32 sp fp32 2 300 4;
+         PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh basicvsr_bs4_fp32 sp fp32 4 300 4;
+         PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3 bash run_benchmark.sh basicvsr_bs2_fp32 mp fp32 2 300 4;
+         PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3 bash run_benchmark.sh basicvsr_bs4_fp32 mp fp32 4 300 4;
          "
+         #PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh esrgan_bs64_fp32 sp fp32 64 300 4;
+         #PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh esrgan_bs64_fp32 mp fp32 64 300 4;
+         #PORT=23335 CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh edvr_bs64_fp32 sp fp32 64 300 3;
+         #PORT=23335 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh edvr_bs64_fp32 mp fp32 64 300 3;
 
 nvidia-docker run --name test_torch_gan -i  \
     --net=host \
     --shm-size=128g \
     -v $PWD:/workspace \
+    -v /ssd2:/ssd2 \
+    -e "ALL_PATH=${all_path}" \
+    -v "BENCHMARK_ROOT=/workspace" \
+    -e "http_proxy=${http_proxy}" \
+    -e "https_proxy=${http_proxy}" \
+    -e "no_proxy=bcebos.com" \
     ${ImageName}  /bin/bash -c "${run_cmd}"
 
 nvidia-docker stop test_torch_gan
-nvidia-docker rm test_torch_gan
+nvidia-docker rm test_torch_gan
diff --git a/OtherFrame/gan/PyTorch/mmedting/scripts/PrepareEnv.sh b/OtherFrame/gan/PyTorch/mmedting/scripts/PrepareEnv.sh
@@ -5,10 +5,7 @@ echo "*******prepare benchmark***********"
 
 ################################# 创建一些log目录,如:
 export BENCHMARK_ROOT=/workspace
-log_date=`date "+%Y.%m%d.%H%M%S"`
-frame=pytorch1.9.0
-cuda_version=10.2
-save_log_dir=${BENCHMARK_ROOT}/logs/${frame}_${log_date}_${cuda_version}/
+save_log_dir=${BENCHMARK_ROOT}/logs/
 
 if [[ -d ${save_log_dir} ]]; then
     rm -rf ${save_log_dir}

diff --git a/OtherFrame/gan/PyTorch/mmedting/scripts/run_benchmark.sh b/OtherFrame/gan/PyTorch/mmedting/scripts/run_benchmark.sh
@@ -17,8 +17,8 @@ function _set_params(){
     device=${CUDA_VISIBLE_DEVICES//,/ }
     arr=(${device})
     num_gpu_devices=${#arr[*]}
-    log_file=${run_log_path}/${model_name}_${fp_item}_${num_gpu_devices}
-    res_log_file=${run_log_path}/${model_name}_${fp_item}_${num_gpu_devices}_speed
+    log_file=${run_log_path}/${model_name}_${num_gpu_devices}_${run_mode}
+    res_log_file=${run_log_path}/${model_name}_${num_gpu_devices}_${run_mode}_speed
 }
 
 function _analysis_log(){
@@ -30,18 +30,20 @@ function _analysis_log(){
 function _train(){
     echo "Train ${model_name} on ${num_gpu_devices} GPUs"
     echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"
-
-    train_config="mmedi_benchmark_configs/${model_name}.py"
+
+
+    train_config="mmedi_benchmark_configs/${model_name%%_*}_${run_mode}_bs${batch_size}.py"
     train_options="--no-validate "
 
     case ${run_mode} in
     sp) train_cmd="./tools/dist_train.sh ${train_config} 1 ${train_options}" ;;
     mp)
-        case ${model_name} in
-        basicvsr_mp_bs2|basicvsr_mp_bs4) train_cmd="./tools/dist_train.sh ${train_config} 4 ${train_options}" ;;
-        *) train_cmd="./tools/dist_train.sh ${train_config} 8 ${train_options}"
-        esac
-        ;;
+        if [ ${model_name} = "basicvsr_bs2_fp32" ] || [ ${model_name} = "basicvsr_bs4_fp32" ]; then
+            train_cmd="./tools/dist_train.sh ${train_config} 4 ${train_options}"
+        else
+            train_cmd="./tools/dist_train.sh ${train_config} 8 ${train_options}"
+        fi
+       ;;
     *) echo "choose run_mode(sp or mp)"; exit 1;
     esac
 
@@ -64,4 +66,4 @@ function _train(){
 }
 
 _set_params $@
-_train
+_train
diff --git a/OtherFrame/scripts/auto_run.sh b/OtherFrame/scripts/auto_run.sh
@@ -57,7 +57,7 @@ function set_env(){
 
 
 
-cur_torch_list=(clas_model_torch seg_model_torch speech_model_torch detec_torch_jde-fairmot detec_torch_fast)
+cur_torch_list=(clas_model_torch seg_model_torch speech_model_torch detec_torch_jde-fairmot detec_torch_fast gan_torch_models) 
 cur_mxnet_list=()
 cur_tensorflow_list=()
 
@@ -113,6 +113,22 @@ detec_torch_fast(){
     cp models/SOLO/*fp32_8 ${TRAIN_LOG_DIR}
 }
 
+gan_torch_models(){
+    # FOMM
+    cur_model_path=${ROOT_DIR}/gan/PyTorch/fomm
+    cd ${cur_model_path}
+    bash run_PyTorch.sh
+    cp ${cur_model_path}/logs/train_log/*  ${TRAIN_LOG_DIR}
+    cp ${cur_model_path}/*speed ${LOG_PATH_INDEX_DIR}
+
+    # edvr basicvsr esrgan
+    cur_model_path=${ROOT_DIR}/gan/PyTorch/mmedting
+    cd ${cur_model_path}
+    bash run_PyTorch.sh
+    cp ${cur_model_path}/*speed ${LOG_PATH_INDEX_DIR}
+    cp ${cur_model_path}/*sp ${TRAIN_LOG_DIR}
+    cp ${cur_model_path}/*mp ${TRAIN_LOG_DIR}
+}
 set_env
 for model_name in ${cur_torch_list[@]}
     do