Skip to content

Commit 7afdb90

Browse files
committed
Refactor
1 parent c68c6a0 commit 7afdb90

File tree

1 file changed

+3
-5
lines changed

1 file changed

+3
-5
lines changed

main.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,15 +143,13 @@ def ocr(attachment: UploadFile):
143143
path_hash = hashlib.sha256(output_filename.encode('utf-8')).hexdigest()
144144
# Check the content-type, if image, then extract text using Tesseract.
145145
if type_details.mime_type.startswith('image'):
146-
extraction_function = extract_image_text_and_set_db
147-
# processed_file_path = preprocess_image_opencv(output_filename)
146+
# Attempt extraction through Tesseract
148147
set_object(key=path_hash, field="type", value="image")
149-
enqueue_extraction(extraction_function=extract_image_text_and_set_db, file_path=output_filename, key=path_hash, field="content")
148+
enqueue_extraction(extraction_function=extract_image_text_and_set_db, file_path=output_filename, key=path_hash)
150149
elif type_details.mime_type.startswith('application/pdf'):
151150
# Attempt extracting text using pdfminer.six or else through the image conversion -> OCR pipeline.
152-
extraction_function = extract_pdf_text_and_set_db
153151
set_object(key=path_hash, field="type", value="pdf")
154-
enqueue_extraction(extraction_function=extraction_function, file_path=output_filename, key=path_hash, field="content")
152+
enqueue_extraction(extraction_function=extract_pdf_text_and_set_db, file_path=output_filename, key=path_hash)
155153
# Add it to a queue.
156154
BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000")
157155
link = f"{BASE_URL}/ocr-result/{path_hash}"

0 commit comments

Comments
 (0)