update_gan scripts for benchmark (#500)

hysunflower · web-flow · commit 233d05d9b61d · 2021-11-29T19:19:22.000+08:00
* update_gan scripts for benchmark

* update_gan scripts for benchmark
diff --git a/benchmark/benchmark.yaml b/benchmark/benchmark.yaml
@@ -2,31 +2,31 @@ StyleGANv2:
   dataset_web: https://paddlegan.bj.bcebos.com/datasets/ffhq.tar
   config: configs/stylegan_v2_256_ffhq.yaml
   fp_item: fp32
-  bs_item: 3 8
+  bs_item: 8
   total_iters: 100
   log_interval: 5
 
 FOMM:
   dataset_web: https://paddlegan.bj.bcebos.com/datasets/fom_test_data.tar
   config: configs/firstorder_vox_256.yaml
   fp_item: fp32
-  bs_item: 8 16
+  bs_item: 16
   epochs: 1
   log_interval: 1
 
 esrgan:
   dataset_web: https://paddlegan.bj.bcebos.com/datasets/DIV2KandSet14.tar
   config: configs/esrgan_psnr_x4_div2k.yaml
   fp_item: fp32
-  bs_item: 32 64
+  bs_item: 32
   total_iters: 300
   log_interval: 10
 
 edvr:
   dataset: data/REDS
   config: configs/edvr_m_wo_tsa.yaml
   fp_item: fp32
-  bs_item: 4 64
+  bs_item: 4
   total_iters: 300
   log_interval: 10
 
diff --git a/benchmark/prepare.sh b/benchmark/prepare.sh
@@ -0,0 +1,30 @@
+
+#!usr/bin/env bash
+
+export BENCHMARK_ROOT=/workspace
+run_env=$BENCHMARK_ROOT/run_env
+log_date=`date "+%Y.%m%d.%H%M%S"`
+frame=paddle2.1.3
+cuda_version=10.2
+save_log_dir=${BENCHMARK_ROOT}/logs/${frame}_${log_date}_${cuda_version}/
+
+if [[ -d ${save_log_dir} ]]; then
+    rm -rf ${save_log_dir}
+fi
+
+# this for update the log_path coding mat
+export TRAIN_LOG_DIR=${save_log_dir}/train_log
+mkdir -p ${TRAIN_LOG_DIR}
+log_path=${TRAIN_LOG_DIR}
+
+################################# 配置python, 如:
+rm -rf $run_env
+mkdir $run_env
+echo `which python3.7`
+ln -s $(which python3.7)m-config  $run_env/python3-config
+ln -s $(which python3.7) $run_env/python
+ln -s $(which pip3.7) $run_env/pip
+
+export PATH=$run_env:${PATH}
+cd $BENCHMARK_ROOT
+pip install -v -e .
diff --git a/benchmark/run_all.sh b/benchmark/run_all.sh
@@ -1,34 +1,6 @@
-
 #!usr/bin/env bash
 
-export BENCHMARK_ROOT=/workspace
-run_env=$BENCHMARK_ROOT/run_env
-log_date=`date "+%Y.%m%d.%H%M%S"`
-frame=paddle2.1.3
-cuda_version=10.2
-save_log_dir=${BENCHMARK_ROOT}/logs/${frame}_${log_date}_${cuda_version}/
-
-if [[ -d ${save_log_dir} ]]; then
-    rm -rf ${save_log_dir}
-fi
-
-# this for update the log_path coding mat
-export TRAIN_LOG_DIR=${save_log_dir}/train_log
-mkdir -p ${TRAIN_LOG_DIR}
-log_path=${TRAIN_LOG_DIR}
-
-################################# 配置python, 如:
-rm -rf $run_env
-mkdir $run_env
-echo `which python3.7`
-ln -s $(which python3.7)m-config  $run_env/python3-config
-ln -s $(which python3.7) $run_env/python
-ln -s $(which pip3.7) $run_env/pip
-
-export PATH=$run_env:${PATH}
-cd $BENCHMARK_ROOT
-pip install -v -e .
-
+export log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
 
 function parse_yaml {
    local s='[[:space:]]*' w='[a-zA-Z0-9_]*' fs=$(echo @|tr @ '\034')
@@ -79,17 +51,17 @@ for model_mode in ${model_mode_list[@]}; do
       for fp_item in ${fp_item_list[@]}; do
           for bs_item in ${bs_list[@]}
             do
-            echo "index is speed, 1gpus, begin, ${model_name}"
+            echo "index is speed, 1gpus, begin, ${model_mode}"
             run_mode=sp
-            CUDA_VISIBLE_DEVICES=0 benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile}  #  (5min)
+            CUDA_VISIBLE_DEVICES=0 benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile} | tee ${log_path}/gan_dygraph_${model_mode}_${run_mode}_bs${bs_item}_${fp_item}_speed_1gpus 2>&1 #  (5min)
             sleep 60
-            echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
+            echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_mode}"
             run_mode=mp
             basicvsr_name=basicvsr
             if [ ${model_mode} = ${basicvsr_name} ]; then
-                CUDA_VISIBLE_DEVICES=0,1,2,3 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile}
+                CUDA_VISIBLE_DEVICES=0,1,2,3 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile}  | tee ${log_path}/gan_dygraph_${model_mode}_${run_mode}_bs${bs_item}_${fp_item}_speed_4gpus4p 2>&1
             else
-                CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile}
+                CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile}  | tee ${log_path}/gan_dygraph_${model_mode}_${run_mode}_bs${bs_item}_${fp_item}_speed_8gpus8p 2>&1
             fi
             sleep 60
             done
diff --git a/benchmark/run_benchmark.sh b/benchmark/run_benchmark.sh
@@ -14,6 +14,15 @@ function _set_params(){
     run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # TRAIN_LOG_DIR 后续QA设置该参数
     need_profile=${9:-"off"}
 
+    index=1
+    base_batch_size=${batch_size}
+    mission_name="图像生成"
+    direction_id=0
+    keyword="ips:"
+    keyword_loss="G_idt_A_loss:"
+    skip_steps=5
+    ips_unit="images/s"
+
 #   以下不用修改
     device=${CUDA_VISIBLE_DEVICES//,/ }
     arr=(${device})
@@ -23,9 +32,6 @@ function _set_params(){
     log_profile=${run_log_path}/${model_name}_model.profile
 }
 
-function _analysis_log(){
-    python benchmark/analysis_log.py ${model_name} ${log_file} ${res_log_file}
-}
 
 function _train(){
     echo "Train on ${num_gpu_devices} GPUs"
@@ -65,9 +71,8 @@ function _train(){
         cp mylog/workerlog.0 ${log_file}
     fi
 
-    _analysis_log
-    
 }
 
+source ${BENCHMARK_ROOT}/scripts/run_model.sh # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;该脚本在连调时可从benchmark repo中下载https://github.com/PaddlePaddle/benchmark/blob/master/scripts/run_model.sh;如果不联调只想要产出训练log可以注掉本行,提交时需打开
 _set_params $@
-_train
+_run