Skip to content

Commit 321af5a

Browse files
authored
Merge branch 'PaddlePaddle:develop' into develop
2 parents e0da903 + afa3d23 commit 321af5a

10 files changed

+15004
-21
lines changed

data/k400/train_small_frames.list

+7,488
Large diffs are not rendered by default.

data/k400/train_small_videos.list

+7,488
Large diffs are not rendered by default.

paddlevideo/tasks/train.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,14 @@ def train_model(cfg,
4040
4141
Args:
4242
cfg (dict): configuration.
43-
weights (str): weights path for finetuning.
44-
parallel (bool): Whether multi-cards training. Default: True.
45-
validate (bool): Whether to do evaluation. Default: False.
46-
amp (bool): Whether to use automatic mixed precision during training. Default: False.
47-
use_fleet (bool):
48-
profiler_options (str): Activate the profiler function Default: None.
43+
weights (str, optional): weights path for finetuning. Defaults to None.
44+
parallel (bool, optional): whether multi-cards training. Defaults to True.
45+
validate (bool, optional): whether to do evaluation. Defaults to True.
46+
amp (bool, optional): whether to use automatic mixed precision during training. Defaults to False.
47+
max_iters (int, optional): max running iters in an epoch. Defaults to None.
48+
use_fleet (bool, optional): whether to use fleet. Defaults to False.
49+
profiler_options (str, optional): configuration for the profiler function. Defaults to None.
50+
4951
"""
5052
if use_fleet:
5153
fleet.init(is_collective=True)
@@ -193,8 +195,9 @@ def train_model(cfg,
193195
scaler.minimize(optimizer, scaled)
194196
optimizer.clear_grad()
195197
else: # general case
196-
# 4.2 backward
198+
# Loss scaling
197199
scaled = scaler.scale(avg_loss)
200+
# 4.2 backward
198201
scaled.backward()
199202
# 4.3 minimize
200203
scaler.minimize(optimizer, scaled)
@@ -354,10 +357,10 @@ def evaluate(best):
354357
save(
355358
optimizer.state_dict(),
356359
osp.join(output_dir,
357-
model_name + f"_epoch_{epoch+1:05d}.pdopt"))
360+
model_name + f"_epoch_{epoch + 1:05d}.pdopt"))
358361
save(
359362
model.state_dict(),
360363
osp.join(output_dir,
361-
model_name + f"_epoch_{epoch+1:05d}.pdparams"))
364+
model_name + f"_epoch_{epoch + 1:05d}.pdparams"))
362365

363366
logger.info(f'training {model_name} finished')

test_tipc/benchmark_train.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,8 @@ func_sed_params "$FILENAME" "${line_export_py_2}" "null"
134134
func_sed_params "$FILENAME" "${line_export_py_3}" "null"
135135
func_sed_params "$FILENAME" "${line_python}" "$python"
136136

137-
# 末尾加上--log_interval=1,以便输出足量数据
138-
set_log_interval_cmd="sed -i '${line_norm_train}s/.*/& -o log_interval=1/' '${filename}'"
137+
# 末尾加上--max_iters=30和--log_interval=1,以便运行并输出足量数据
138+
set_log_interval_cmd="sed -i '${line_norm_train}s/.*/& --max_iters=30 -o log_interval=1/' '${filename}'"
139139
eval $set_log_interval_cmd
140140

141141
# 去掉--validate,benchmark不需要validate

test_tipc/configs/AGCN/AGCN_train_infer_python.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ train_infer_video_dir:null
1313
null:null
1414
##
1515
trainer:norm_train
16-
norm_train:main.py -c configs/recognition/agcn/agcn_fsd.yaml --seed 1234 --max_iters=10 -o DATASET.train.file_path="data/fsd10/FSD_train_data.npy" -o DATASET.train.label_path="data/fsd10/FSD_train_label.npy" -o DATASET.test.file_path="data/fsd10/FSD_train_data.npy"
16+
norm_train:main.py -c configs/recognition/agcn/agcn_fsd.yaml --seed 1234 -o DATASET.train.file_path="data/fsd10/FSD_train_data.npy" -o DATASET.train.label_path="data/fsd10/FSD_train_label.npy" -o DATASET.test.file_path="data/fsd10/FSD_train_data.npy"
1717
pact_train:null
1818
fpgm_train:null
1919
distill_train:null

test_tipc/configs/BMN/BMN_train_infer_python.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ train_model_name:null
1313
-o DATASET.train.file_path:null
1414
##
1515
trainer:norm_train
16-
norm_train:main.py --validate -c configs/localization/bmn.yaml --seed 1234 --max_iters=30
16+
norm_train:main.py --validate -c configs/localization/bmn.yaml --seed 1234
1717
pact_train:null
1818
fpgm_train:null
1919
distill_train:null

test_tipc/configs/STGCN/STGCN_train_infer_python.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ train_infer_video_dir:null
1313
null:null
1414
##
1515
trainer:norm_train
16-
norm_train:main.py -c configs/recognition/stgcn/stgcn_fsd.yaml --seed 1234 --max_iters=10 -o DATASET.train.file_path="data/fsd10/FSD_train_data.npy" -o DATASET.train.label_path="data/fsd10/FSD_train_label.npy" -o DATASET.test.file_path="data/fsd10/FSD_train_data.npy"
16+
norm_train:main.py -c configs/recognition/stgcn/stgcn_fsd.yaml --seed 1234 -o DATASET.train.file_path="data/fsd10/FSD_train_data.npy" -o DATASET.train.label_path="data/fsd10/FSD_train_label.npy" -o DATASET.test.file_path="data/fsd10/FSD_train_data.npy"
1717
pact_train:null
1818
fpgm_train:null
1919
distill_train:null

test_tipc/configs/SlowFast/SlowFast_train_infer_python.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ train_infer_video_dir:null
1313
-o DATASET.train.file_path:'data/k400/train_small_videos.list' -o DATASET.valid.file_path='data/k400/train_small_videos.list' -o DATASET.test.file_path='data/k400/train_small_videos.list'
1414
##
1515
trainer:norm_train
16-
norm_train:main.py --validate -c configs/recognition/slowfast/slowfast.yaml --seed 1234 --max_iters=30
16+
norm_train:main.py --validate -c configs/recognition/slowfast/slowfast.yaml --seed 1234
1717
pact_train:null
1818
fpgm_train:null
1919
distill_train:null

test_tipc/docs/test_train_inference_python.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
112112
python3.7 test_tipc/compare_results.py --gt_file="test_tipc/results/python_*.txt" --log_file="test_tipc/output/python_*.log" --atol=1e-3 --rtol=1e-3
113113
```
114114

115-
参数介绍:
115+
参数介绍:
116116
- gt_file: 指向事先保存好的预测结果路径,支持*.txt 结尾,会自动索引*.txt格式的文件,文件默认保存在test_tipc/result/ 文件夹下
117117
- log_file: 指向运行test_tipc/test_train_inference_python.sh 脚本的infer模式保存的预测日志,预测日志中打印的有预测结果,比如:预测文本,类别等等,同样支持python_infer_*.log格式传入
118118
- atol: 设置的绝对误差

test_tipc/test_train_inference_python.sh

+9-5
Original file line numberDiff line numberDiff line change
@@ -145,16 +145,16 @@ function func_inference(){
145145
for use_gpu in ${use_gpu_list[*]}; do
146146
if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
147147
for use_mkldnn in ${use_mkldnn_list[*]}; do
148-
if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
148+
if [[ ${use_mkldnn} = "False" ]] && [[ ${_flag_quant} = "True" ]]; then
149149
continue
150150
fi
151151
for threads in ${cpu_threads_list[*]}; do
152152
for batch_size in ${batch_size_list[*]}; do
153153
for precision in ${precision_list[*]}; do
154-
if [ ${use_mkldnn} = "False" ] && [ ${precision} = "fp16" ]; then
154+
if [[ ${use_mkldnn} = "False" ]] && [[ ${precision} = "fp16" ]]; then
155155
continue
156156
fi # skip when enable fp16 but disable mkldnn
157-
if [ ${_flag_quant} = "True" ] && [ ${precision} != "int8" ]; then
157+
if [[ ${_flag_quant} = "True" ]] && [[ ${precision} != "int8" ]]; then
158158
continue
159159
fi # skip when quant model inference but precision is not int8
160160
set_precision=$(func_set_params "${precision_key}" "${precision}")
@@ -185,10 +185,10 @@ function func_inference(){
185185
if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
186186
continue
187187
fi
188-
if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
188+
if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [[ ${use_trt} = "False" ]]; then
189189
continue
190190
fi
191-
if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
191+
if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [[ ${_flag_quant} = "True" ]]; then
192192
continue
193193
fi
194194
for batch_size in ${batch_size_list[*]}; do
@@ -309,6 +309,10 @@ else
309309
run_export=${export_value2}
310310
else
311311
run_train=${norm_trainer}
312+
if [[ ${MODE} != "benchmark_train" ]] && [[ ! ${MODE} =~ "whole_train" ]]; then
313+
# 训练参数末尾加上--max_iters=30和--log_interval=1,以便运行并输出足量数据
314+
run_train=${run_train}" --max_iters=30"
315+
fi
312316
run_export=${norm_export}
313317
fi
314318

0 commit comments

Comments
 (0)