Skip to content

Commit be32083

Browse files
modify yaml for global ref
1 parent c53e761 commit be32083

File tree

4 files changed

+34
-27
lines changed

4 files changed

+34
-27
lines changed

configs/rec/PP-FormuaNet/rec_pp_formulanet_l.yml

+11-9
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ Global:
1515
infer_img: doc/datasets/pme_demo/0000013.png
1616
infer_mode: False
1717
use_space_char: False
18-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
18+
rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer
19+
max_new_tokens: &max_new_tokens 1024
20+
input_size: &input_size [768, 768]
1921
save_res_path: ./output/rec/predicts_unimernet_latexocr.txt
2022
allow_resize_largeImg: False
2123
start_ema: True
@@ -43,7 +45,7 @@ Architecture:
4345
encoder_global_attn_indexes: [2, 5, 8, 11]
4446
Head:
4547
name: PPFormulaNet_Head
46-
max_new_tokens: 1024
48+
max_new_tokens: *max_new_tokens
4749
decoder_start_token_id: 0
4850
decoder_ffn_dim: 2048
4951
decoder_hidden_size: 512
@@ -62,7 +64,7 @@ Loss:
6264

6365
PostProcess:
6466
name: UniMERNetDecode
65-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
67+
rec_char_dict_path: *rec_char_dict_path
6668

6769
Metric:
6870
name: LaTeXOCRMetric
@@ -76,12 +78,12 @@ Train:
7678
label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
7779
transforms:
7880
- UniMERNetImgDecode:
79-
input_size: [768, 768]
81+
input_size: *input_size
8082
- UniMERNetTrainTransform:
8183
- LatexImageFormat:
8284
- UniMERNetLabelEncode:
83-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
84-
max_seq_len: 1024
85+
rec_char_dict_path: *rec_char_dict_path
86+
max_seq_len: *max_new_tokens
8587
- KeepKeys:
8688
keep_keys: ['image', 'label', 'attention_mask']
8789

@@ -99,12 +101,12 @@ Eval:
99101
label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
100102
transforms:
101103
- UniMERNetImgDecode:
102-
input_size: [768, 768]
104+
input_size: *input_size
103105
- UniMERNetTestTransform:
104106
- LatexImageFormat:
105107
- UniMERNetLabelEncode:
106-
max_seq_len: 1024
107-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
108+
max_seq_len: *max_new_tokens
109+
rec_char_dict_path: *rec_char_dict_path
108110
- KeepKeys:
109111
keep_keys: ['image', 'label', 'attention_mask', 'filename']
110112
loader:

configs/rec/PP-FormuaNet/rec_pp_formulanet_s.yml

+11-9
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ Global:
1515
infer_img: doc/datasets/pme_demo/0000013.png
1616
infer_mode: False
1717
use_space_char: False
18-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
18+
rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer
19+
max_new_tokens: &max_new_tokens 1024
20+
input_size: &input_size [384, 384]
1921
save_res_path: ./output/rec/predicts_unimernet_latexocr.txt
2022
allow_resize_largeImg: False
2123
start_ema: True
@@ -40,7 +42,7 @@ Architecture:
4042

4143
Head:
4244
name: PPFormulaNet_Head
43-
max_new_tokens: 1024
45+
max_new_tokens: *max_new_tokens
4446
decoder_start_token_id: 0
4547
decoder_ffn_dim: 1536
4648
decoder_hidden_size: 384
@@ -60,7 +62,7 @@ Loss:
6062

6163
PostProcess:
6264
name: UniMERNetDecode
63-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
65+
rec_char_dict_path: *rec_char_dict_path
6466

6567
Metric:
6668
name: LaTeXOCRMetric
@@ -74,12 +76,12 @@ Train:
7476
label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
7577
transforms:
7678
- UniMERNetImgDecode:
77-
input_size: [384, 384]
79+
input_size: *input_size
7880
- UniMERNetTrainTransform:
7981
- LatexImageFormat:
8082
- UniMERNetLabelEncode:
81-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
82-
max_seq_len: 1024
83+
rec_char_dict_path: *rec_char_dict_path
84+
max_seq_len: *max_new_tokens
8385
- KeepKeys:
8486
keep_keys: ['image', 'label', 'attention_mask']
8587

@@ -97,12 +99,12 @@ Eval:
9799
label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
98100
transforms:
99101
- UniMERNetImgDecode:
100-
input_size: [384, 384]
102+
input_size: *input_size
101103
- UniMERNetTestTransform:
102104
- LatexImageFormat:
103105
- UniMERNetLabelEncode:
104-
max_seq_len: 1024
105-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
106+
max_seq_len: *max_new_tokens
107+
rec_char_dict_path: *rec_char_dict_path
106108
- KeepKeys:
107109
keep_keys: ['image', 'label', 'attention_mask', 'filename']
108110
loader:

configs/rec/rec_unimernet.yml

+10-8
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ Global:
1515
infer_img: doc/datasets/pme_demo/0000013.png
1616
infer_mode: False
1717
use_space_char: False
18-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
18+
rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer
19+
input_size: &input_size [192, 672]
20+
max_seq_len: &max_seq_len 1024
1921
save_res_path: ./output/rec/predicts_unimernet_plus_config_latexocr.txt
2022
allow_resize_largeImg: False
2123

@@ -59,7 +61,7 @@ Loss:
5961

6062
PostProcess:
6163
name: UniMERNetDecode
62-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
64+
rec_char_dict_path: *rec_char_dict_path
6365

6466
Metric:
6567
name: LaTeXOCRMetric
@@ -73,12 +75,12 @@ Train:
7375
label_file_list: ["./train_data/UniMERNet/train_unimernet_1M.txt"]
7476
transforms:
7577
- UniMERNetImgDecode:
76-
input_size: [192, 672]
78+
input_size: *input_size
7779
- UniMERNetTrainTransform:
7880
- UniMERNetImageFormat:
7981
- UniMERNetLabelEncode:
80-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
81-
max_seq_len: 1024
82+
rec_char_dict_path: *rec_char_dict_path
83+
max_seq_len: *max_seq_len
8284
- KeepKeys:
8385
keep_keys: ['image', 'label', 'attention_mask']
8486
loader:
@@ -95,12 +97,12 @@ Eval:
9597
label_file_list: ["./train_data/UniMERNet/test_unimernet_cpe.txt"]
9698
transforms:
9799
- UniMERNetImgDecode:
98-
input_size: [192, 672]
100+
input_size: *input_size
99101
- UniMERNetTestTransform:
100102
- UniMERNetImageFormat:
101103
- UniMERNetLabelEncode:
102-
max_seq_len: 1024
103-
rec_char_dict_path: ppocr/utils/dict/unimernet_tokenizer
104+
max_seq_len: *max_seq_len
105+
rec_char_dict_path: *rec_char_dict_path
104106
- KeepKeys:
105107
keep_keys: ['image', 'label', 'attention_mask']
106108
loader:

ppocr/losses/rec_ppformulanet_loss.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def __init__(self, vocab_size=50000, parallel_step=1):
2626
self.vocab_size = vocab_size
2727
self.parallel_step = int(parallel_step)
2828
self.pad_token_id = 1
29+
# ignore padding characters during training
2930
self.cross = nn.CrossEntropyLoss(
3031
reduction="mean", ignore_index=self.ignore_index
3132
)
@@ -54,7 +55,7 @@ def __init__(self, vocab_size=50000):
5455
self.ignore_index = -100
5556
self.vocab_size = vocab_size
5657
self.pad_token_id = 1
57-
# 训练时是否忽略 padding
58+
# ignore padding characters during training
5859
self.cross = nn.CrossEntropyLoss(
5960
reduction="mean", ignore_index=self.ignore_index
6061
)

0 commit comments

Comments
 (0)