File tree 2 files changed +18
-18
lines changed
2 files changed +18
-18
lines changed Original file line number Diff line number Diff line change @@ -368,7 +368,7 @@ def run_checks(self, args):
368
368
369
369
if args .check_train_result_json :
370
370
# 检查 train_result.json 内容
371
- train_result_json = os .path .join (output_dir , "train_results .json" )
371
+ train_result_json = os .path .join (output_dir , "train_result .json" )
372
372
check_weights_items = args .check_weights_items
373
373
check_train_json_message = []
374
374
check_train_json_flag , check_train_json_message = self .check_train_json_content (
Original file line number Diff line number Diff line change @@ -52,26 +52,26 @@ function run_command(){
52
52
printf " \e[32m|%-20s| %-50s | %-20s\n\e[0m" " [${time_stamp} ]" " ${command} "
53
53
eval $command
54
54
last_status=${PIPESTATUS[0]}
55
- if [[ $MODE != ' PaddleX' ]]; then
56
- n=1
57
- # Try 3 times to run command if it fails
58
- while [[ $last_status != 0 ]]; do
59
- sleep 10
60
- n=` expr $n + 1`
61
- printf " \e[32m|%-20s| %-50s | %-20s\n\e[0m" " [${time_stamp} ]" " ${command} "
62
- eval $command
63
- last_status=${PIPESTATUS[0]}
64
- if [[ $n -eq 3 && $last_status != 0 ]]; then
65
- echo " Retry 3 times failed with command: ${command} "
55
+ n=1
56
+ # Try 2 times to run command if it fails
57
+ while [[ $last_status != 0 ]]; do
58
+ sleep 10
59
+ n=` expr $n + 1`
60
+ printf " \e[32m|%-20s| %-50s | %-20s\n\e[0m" " [${time_stamp} ]" " ${command} "
61
+ sync
62
+ echo 1 > /proc/sys/vm/drop_caches
63
+ eval $command
64
+ last_status=${PIPESTATUS[0]}
65
+ if [[ $n -eq 2 && $last_status != 0 ]]; then
66
+ if [[ $MODE != ' PaddleX' ]]; then
67
+ echo " Retry 2 times failed with command: ${command} "
66
68
exit 1
69
+ else
70
+ failed_cmd_list=" $failed_cmd_list \n ${module_name} | command: ${command} "
71
+ echo " Run ${command} failed"
67
72
fi
68
- done
69
- else
70
- if [[ $last_status != 0 ]]; then
71
- failed_cmd_list=" $failed_cmd_list \n ${module_name} | command: ${command} "
72
- echo " Run ${command} failed"
73
73
fi
74
- fi
74
+ done
75
75
}
76
76
77
77
# 准备数据集并做数据校验
You can’t perform that action at this time.
0 commit comments