Skip to content

Commit 69ba2b9

Browse files
zhangyubo0722zhangyubo0722
and
zhangyubo0722
authored
add ocrv5 mobile rec yaml (#15142)
Co-authored-by: zhangyubo0722 <zangyubo0722@163.com>
1 parent a836921 commit 69ba2b9

File tree

2 files changed

+141
-1
lines changed

2 files changed

+141
-1
lines changed
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
Global:
2+
model_name: PP-OCRv5_mobile_rec # To use static model for inference.
3+
debug: false
4+
use_gpu: true
5+
epoch_num: 75
6+
log_smooth_window: 20
7+
print_batch_step: 10
8+
save_model_dir: ./output/PP-OCRv5_mobile_rec
9+
save_epoch_step: 10
10+
eval_batch_step: [0, 2000]
11+
cal_metric_during_train: true
12+
pretrained_model:
13+
checkpoints:
14+
save_inference_dir:
15+
use_visualdl: false
16+
infer_img: doc/imgs_words/ch/word_1.jpg
17+
character_dict_path: ./ppocr/utils/dict/ppocrv5_dict.txt
18+
max_text_length: &max_text_length 25
19+
infer_mode: false
20+
use_space_char: true
21+
distributed: true
22+
save_res_path: ./output/rec/predicts_ppocrv5.txt
23+
d2s_train_image_shape: [3, 48, 320]
24+
25+
26+
Optimizer:
27+
name: Adam
28+
beta1: 0.9
29+
beta2: 0.999
30+
lr:
31+
name: Cosine
32+
learning_rate: 0.0005
33+
warmup_epoch: 5
34+
regularizer:
35+
name: L2
36+
factor: 3.0e-05
37+
38+
39+
Architecture:
40+
model_type: rec
41+
algorithm: SVTR_LCNet
42+
Transform:
43+
Backbone:
44+
name: PPLCNetV3
45+
scale: 0.95
46+
Head:
47+
name: MultiHead
48+
head_list:
49+
- CTCHead:
50+
Neck:
51+
name: svtr
52+
dims: 120
53+
depth: 2
54+
hidden_dims: 120
55+
kernel_size: [1, 3]
56+
use_guide: True
57+
Head:
58+
fc_decay: 0.00001
59+
- NRTRHead:
60+
nrtr_dim: 384
61+
max_text_length: *max_text_length
62+
63+
Loss:
64+
name: MultiLoss
65+
loss_config_list:
66+
- CTCLoss:
67+
- NRTRLoss:
68+
69+
PostProcess:
70+
name: CTCLabelDecode
71+
72+
Metric:
73+
name: RecMetric
74+
main_indicator: acc
75+
76+
Train:
77+
dataset:
78+
name: MultiScaleDataSet
79+
ds_width: false
80+
data_dir: ./train_data/
81+
ext_op_transform_idx: 1
82+
label_file_list:
83+
- ./train_data/train_list.txt
84+
transforms:
85+
- DecodeImage:
86+
img_mode: BGR
87+
channel_first: false
88+
- RecConAug:
89+
prob: 0.5
90+
ext_data_num: 2
91+
image_shape: [48, 320, 3]
92+
max_text_length: *max_text_length
93+
- RecAug:
94+
- MultiLabelEncode:
95+
gtc_encode: NRTRLabelEncode
96+
- KeepKeys:
97+
keep_keys:
98+
- image
99+
- label_ctc
100+
- label_gtc
101+
- length
102+
- valid_ratio
103+
sampler:
104+
name: MultiScaleSampler
105+
scales: [[320, 32], [320, 48], [320, 64]]
106+
first_bs: &bs 128
107+
fix_bs: false
108+
divided_factor: [8, 16] # w, h
109+
is_training: True
110+
loader:
111+
shuffle: true
112+
batch_size_per_card: *bs
113+
drop_last: true
114+
num_workers: 8
115+
Eval:
116+
dataset:
117+
name: SimpleDataSet
118+
data_dir: ./train_data
119+
label_file_list:
120+
- ./train_data/val_list.txt
121+
transforms:
122+
- DecodeImage:
123+
img_mode: BGR
124+
channel_first: false
125+
- MultiLabelEncode:
126+
gtc_encode: NRTRLabelEncode
127+
- RecResizeImg:
128+
image_shape: [3, 48, 320]
129+
- KeepKeys:
130+
keep_keys:
131+
- image
132+
- label_ctc
133+
- label_gtc
134+
- length
135+
- valid_ratio
136+
loader:
137+
shuffle: false
138+
drop_last: false
139+
batch_size_per_card: 128
140+
num_workers: 4

configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ Train:
9999
sampler:
100100
name: MultiScaleSampler
101101
scales: [[320, 32], [320, 48], [320, 64]]
102-
first_bs: &bs 64
102+
first_bs: &bs 128
103103
fix_bs: false
104104
divided_factor: [8, 16] # w, h
105105
is_training: True

0 commit comments

Comments
 (0)