From 10c66ac1c8f9c97172493944f5ced3e910f382e8 Mon Sep 17 00:00:00 2001 From: BUJIQI <145289312+BUJIQI@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:47:27 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E5=8D=B0=E7=AB=A0=E5=BC=AF=E6=9B=B2?= =?UTF-8?q?=E6=96=87=E5=AD=97=E8=AF=86=E5=88=AB.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...07\345\255\227\350\257\206\345\210\253.md" | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git "a/docs/applications/\345\215\260\347\253\240\345\274\257\346\233\262\346\226\207\345\255\227\350\257\206\345\210\253.md" "b/docs/applications/\345\215\260\347\253\240\345\274\257\346\233\262\346\226\207\345\255\227\350\257\206\345\210\253.md" index be9b2f6beaf..0b60efd15c2 100644 --- "a/docs/applications/\345\215\260\347\253\240\345\274\257\346\233\262\346\226\207\345\255\227\350\257\206\345\210\253.md" +++ "b/docs/applications/\345\215\260\347\253\240\345\274\257\346\233\262\346\226\207\345\255\227\350\257\206\345\210\253.md" @@ -995,6 +995,62 @@ def run(data_dir, label_file, save_dir): ``` +若训练数据使用**4点标注以外的多点标注**(如8点,16点等不规则形状标注),可以使用以下代码进行印章文字区域剪切 + +
+ +```python linenums="1" +import cv2 +import numpy as np +import os +import json + +def get_polygon_crop_image(img, points): + points = np.array(points, dtype=np.int32) + mask = np.zeros_like(img, dtype=np.uint8) + cv2.fillPoly(mask, [points], (255, 255, 255)) + result = cv2.bitwise_and(img, mask) + x, y, w, h = cv2.boundingRect(points) + cropped_result = result[y:y+h, x:x+w] + return cropped_result + + +def run(data_dir, label_file, save_dir, output_txt): + if not os.path.exists(output_txt): + os.makedirs(os.path.dirname(output_txt), exist_ok=True) + open(output_txt, 'w').close() + + with open(output_txt, 'w') as txt_file: + datas = open(label_file, 'r').readlines() + for line in datas: + filename, label = line.strip().split('\t') + img_path = os.path.join(data_dir, filename) + label = json.loads(label) + src_im = cv2.imread(img_path) + if src_im is None: + continue + for i, anno in enumerate(label): + txt_boxes = anno['points'] + crop_im = get_polygon_crop_image(src_im, txt_boxes) + crop_img_name = f'{filename.split("/")[-1].split(".")[0]}_crop_{i}.jpg' + save_path = os.path.join(save_dir, crop_img_name) + if not os.path.exists(save_dir): + os.makedirs(save_dir) + cv2.imwrite(save_path, crop_im) + txt_file.write(f'crop_img/{crop_img_name}\t{anno["transcription"]}\n') + +if __name__ == "__main__": + data_dir = "seal" # 图片数据集路径 + label_file = "Label.txt" # 数据标记结果txt路径 + save_dir = "crop_img" #导出识别结果,即识别所用训练图片的文件夹路径 + output_txt = "rec_gt.txt" # 识别训练标记结果txt路径 + run(data_dir, label_file, save_dir, output_txt) + +``` + +
+ + 数据处理完成后,即可配置训练的配置文件。SVTR配置文件选择[configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml) 修改SVTR配置文件中的训练数据部分如下: