Skip to content

Commit af87691

Browse files
authored
add ci for paddleocr test (#12062)
* add ci for paddleocr test * fix flake8 error * fix paddlepaddle deps * add dep * fix * move flake8 to pre-commit * update ut * fix bug * fix bug set paddlepaddle==2.5 * fix bug * fix bug * fix bug * update test * remove lscpu
1 parent 579d0c3 commit af87691

File tree

14 files changed

+184
-18
lines changed

14 files changed

+184
-18
lines changed

.github/workflows/pre-commit.yml renamed to .github/workflows/codestyle.yml

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1-
name: pre-commit
1+
name: PaddleOCR Code Style Check
22

33
on:
44
pull_request:
55
push:
66
branches: ['main', 'release/*']
77

88
jobs:
9-
pre-commit:
9+
check-code-style:
1010
runs-on: ubuntu-latest
1111
steps:
12-
- uses: actions/checkout@v3
13-
- uses: actions/setup-python@v3
12+
- uses: actions/checkout@v4
13+
with:
14+
ref: ${{ github.ref }}
15+
- uses: actions/setup-python@v5
1416
with:
1517
python-version: '3.10'
1618
# Install Dependencies for Python

.github/workflows/tests.yaml

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
name: PaddleOCR PR Tests
2+
3+
on:
4+
push:
5+
pull_request:
6+
branches: ["main", "release/*"]
7+
8+
permissions:
9+
contents: read
10+
11+
jobs:
12+
test-pr:
13+
runs-on: ubuntu-latest
14+
15+
steps:
16+
- uses: actions/checkout@v4
17+
- name: Set up Python 3.10
18+
uses: actions/setup-python@v5
19+
with:
20+
python-version: "3.10"
21+
- name: Install dependencies
22+
run: |
23+
python -m pip install --upgrade pip
24+
pip install pytest
25+
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
26+
pip install "paddlepaddle==2.5" requests
27+
pip install -e .
28+
- name: Test with pytest
29+
run: |
30+
pytest tests/

.pre-commit-config.yaml

+13
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,16 @@ repos:
3535
hooks:
3636
- id: black
3737
files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
38+
39+
# Flake8
40+
- repo: https://github.com/pycqa/flake8
41+
rev: 7.0.0
42+
hooks:
43+
- id: flake8
44+
args:
45+
- --count
46+
- --select=E9,F63,F7,F82
47+
- --show-source
48+
- --statistics
49+
exclude: ^benchmark/|^test_tipc/
50+

benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def __call__(self, data: dict):
2525
return data
2626
data["img"] = (
2727
random_noise(data["img"], mode="gaussian", clip=True) * 255
28-
).astype(im.dtype)
28+
).astype(data["img"].dtype)
2929
return data
3030

3131

deploy/hubserving/kie_ser/module.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def serving_method(self, images, **kwargs):
142142

143143

144144
if __name__ == "__main__":
145-
ocr = OCRSystem()
145+
ocr = KIESer()
146146
ocr._initialize()
147147
image_path = [
148148
"./doc/imgs/11.jpg",

deploy/hubserving/kie_ser_re/module.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def serving_method(self, images, **kwargs):
144144

145145

146146
if __name__ == "__main__":
147-
ocr = OCRSystem()
147+
ocr = KIESerRE()
148148
ocr._initialize()
149149
image_path = [
150150
"./doc/imgs/11.jpg",

ppocr/data/imaug/label_ops.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -841,11 +841,11 @@ def __call__(self, data):
841841
return data
842842

843843
def xyxyxyxy2xywh(self, boxes):
844-
new_bboxes = np.zeros([len(bboxes), 4])
845-
new_bboxes[:, 0] = bboxes[:, 0::2].min() # x1
846-
new_bboxes[:, 1] = bboxes[:, 1::2].min() # y1
847-
new_bboxes[:, 2] = bboxes[:, 0::2].max() - new_bboxes[:, 0] # w
848-
new_bboxes[:, 3] = bboxes[:, 1::2].max() - new_bboxes[:, 1] # h
844+
new_bboxes = np.zeros([len(boxes), 4])
845+
new_bboxes[:, 0] = boxes[:, 0::2].min() # x1
846+
new_bboxes[:, 1] = boxes[:, 1::2].min() # y1
847+
new_bboxes[:, 2] = boxes[:, 0::2].max() - new_bboxes[:, 0] # w
848+
new_bboxes[:, 3] = boxes[:, 1::2].max() - new_bboxes[:, 1] # h
849849
return new_bboxes
850850

851851
def xyxy2xywh(self, bboxes):

ppocr/losses/distillation_loss.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1184,7 +1184,9 @@ def forward(self, predicts, batch):
11841184
loss = super().forward(out1, out2, ctc_label)
11851185
if isinstance(loss, dict):
11861186
for key in loss:
1187-
loss_dict["{}_{}_{}".format(self.name, model_name, idx)] = loss[key]
1187+
loss_dict[
1188+
"{}_{}_{}".format(self.name, self.model_name_pairs, idx)
1189+
] = loss[key]
11881190
else:
11891191
loss_dict["{}_{}".format(self.name, idx)] = loss
11901192
return loss_dict

ppocr/metrics/vqa_token_re_metric.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import numpy as np
2020
import paddle
2121

22-
__all__ = ["KIEMetric"]
22+
__all__ = ["VQAReTokenMetric"]
2323

2424

2525
class VQAReTokenMetric(object):

ppocr/metrics/vqa_token_ser_metric.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import numpy as np
2020
import paddle
2121

22-
__all__ = ["KIEMetric"]
22+
__all__ = ["VQASerTokenMetric"]
2323

2424

2525
class VQASerTokenMetric(object):

ppocr/modeling/backbones/rec_efficientb3_pren.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import paddle.nn as nn
2828
import paddle.nn.functional as F
2929

30-
__all__ = ["EfficientNetb3"]
30+
__all__ = ["EfficientNetb3_PREN"]
3131

3232
GlobalParams = collections.namedtuple(
3333
"GlobalParams",

ppocr/modeling/heads/rec_aster_head.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def sample(self, x):
132132
# Decoder
133133
state = paddle.zeros([1, batch_size, self.sDim])
134134

135-
predicted_ids, predicted_scores = [], []
135+
predicted_ids, predicted_scores, predicted = [], [], None
136136
for i in range(self.max_len_labels):
137137
if i == 0:
138138
y_prev = paddle.full(shape=[batch_size], fill_value=self.num_classes)

ppocr/utils/loggers/wandb_logger.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import os
22
from .base_logger import BaseLogger
3+
from ppocr.utils.logging import get_logger
4+
5+
logger = get_logger()
36

47

58
class WandbLogger(BaseLogger):
@@ -11,7 +14,7 @@ def __init__(
1114
entity=None,
1215
save_dir=None,
1316
config=None,
14-
**kwargs
17+
**kwargs,
1518
):
1619
try:
1720
import wandb

tests/test_paddleocr_api.py

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
from typing import Any
2+
3+
import pytest
4+
from paddleocr import PaddleOCR, PPStructure
5+
6+
7+
# Test image paths
8+
IMAGE_PATHS_OCR = ["./doc/imgs_en/254.jpg", "./doc/imgs_en/img_10.jpg"]
9+
IMAGE_PATHS_STRUCTURE = [
10+
"./ppstructure/docs/table/layout.jpg",
11+
"./ppstructure/docs/table/1.png",
12+
]
13+
14+
15+
@pytest.fixture(params=["en", "ch"])
16+
def ocr_engine(request: Any) -> PaddleOCR:
17+
"""
18+
Initialize PaddleOCR engine with different languages.
19+
20+
Args:
21+
request: pytest fixture request object.
22+
23+
Returns:
24+
An instance of PaddleOCR.
25+
"""
26+
return PaddleOCR(lang=request.param)
27+
28+
29+
def test_ocr_initialization(ocr_engine: PaddleOCR) -> None:
30+
"""
31+
Test PaddleOCR initialization.
32+
33+
Args:
34+
ocr_engine: An instance of PaddleOCR.
35+
"""
36+
assert ocr_engine is not None
37+
38+
39+
@pytest.mark.parametrize("image_path", IMAGE_PATHS_OCR)
40+
def test_ocr_function(ocr_engine: PaddleOCR, image_path: str) -> None:
41+
"""
42+
Test PaddleOCR OCR functionality with different images.
43+
44+
Args:
45+
ocr_engine: An instance of PaddleOCR.
46+
image_path: Path to the image to be processed.
47+
"""
48+
result = ocr_engine.ocr(image_path)
49+
assert result is not None
50+
assert isinstance(result, list)
51+
52+
53+
@pytest.mark.parametrize("image_path", IMAGE_PATHS_OCR)
54+
def test_ocr_det_only(ocr_engine: PaddleOCR, image_path: str) -> None:
55+
"""
56+
Test PaddleOCR OCR functionality with detection only.
57+
58+
Args:
59+
ocr_engine: An instance of PaddleOCR.
60+
image_path: Path to the image to be processed.
61+
"""
62+
result = ocr_engine.ocr(image_path, det=True, rec=False)
63+
assert result is not None
64+
assert isinstance(result, list)
65+
66+
67+
@pytest.mark.parametrize("image_path", IMAGE_PATHS_OCR)
68+
def test_ocr_rec_only(ocr_engine: PaddleOCR, image_path: str) -> None:
69+
"""
70+
Test PaddleOCR OCR functionality with recognition only.
71+
72+
Args:
73+
ocr_engine: An instance of PaddleOCR.
74+
image_path: Path to the image to be processed.
75+
"""
76+
result = ocr_engine.ocr(image_path, det=False, rec=True)
77+
assert result is not None
78+
assert isinstance(result, list)
79+
80+
81+
@pytest.fixture(params=["en", "ch"])
82+
def structure_engine(request: Any) -> PPStructure:
83+
"""
84+
Initialize PPStructure engine with different languages.
85+
86+
Args:
87+
request: pytest fixture request object.
88+
89+
Returns:
90+
An instance of PPStructure.
91+
"""
92+
return PPStructure(lang=request.param)
93+
94+
95+
def test_structure_initialization(structure_engine: PPStructure) -> None:
96+
"""
97+
Test PPStructure initialization.
98+
99+
Args:
100+
structure_engine: An instance of PPStructure.
101+
"""
102+
assert structure_engine is not None
103+
104+
105+
@pytest.mark.parametrize("image_path", IMAGE_PATHS_STRUCTURE)
106+
def test_structure_function(structure_engine: PPStructure, image_path: str) -> None:
107+
"""
108+
Test PPStructure structure analysis functionality with different images.
109+
110+
Args:
111+
structure_engine: An instance of PPStructure.
112+
image_path: Path to the image to be processed.
113+
"""
114+
result = structure_engine(image_path)
115+
assert result is not None
116+
assert isinstance(result, list)

0 commit comments

Comments
 (0)