@@ -27,14 +27,19 @@ def process_page_file(self, *input_files: Optional[OcrdFileType]) -> None:
27
27
metrics = self .parameter ["metrics" ]
28
28
textequiv_level = self .parameter ["textequiv_level" ]
29
29
30
- try :
31
- gt_file , ocr_file = input_files
32
- assert gt_file , 'missing GT file'
33
- assert ocr_file , 'missing OCR file'
34
- assert gt_file .local_filename
35
- assert ocr_file .local_filename
36
- except (ValueError , AssertionError ) as err :
37
- self .logger .warning (f'Missing either GT file, OCR file or both: { err } ' ) # TODO how to log which page?
30
+ # wrong number of inputs: let fail
31
+ gt_file , ocr_file = input_files
32
+ # missing on either side: skip (zip_input_files already warned)
33
+ if not gt_file or not ocr_file :
34
+ return
35
+ # missing download (i.e. OCRD_DOWNLOAD_INPUT=false):
36
+ if not gt_file .local_filename :
37
+ if config .OCRD_MISSING_INPUT == 'ABORT' :
38
+ raise MissingInputFile (gt_file .fileGrp , gt_file .pageId , gt_file .mimetype )
39
+ return
40
+ if not ocr_file .local_filename :
41
+ if config .OCRD_MISSING_INPUT == 'ABORT' :
42
+ raise MissingInputFile (ocr_file .fileGrp , ocr_file .pageId , ocr_file .mimetype )
38
43
return
39
44
40
45
page_id = gt_file .pageId
0 commit comments