Skip to content

Commit dcc4ceb

Browse files
committed
Added async support for Amazon Textract detection.
1 parent a7e1910 commit dcc4ceb

File tree

1 file changed

+16
-8
lines changed

1 file changed

+16
-8
lines changed

main.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import List
55

66
from fastapi import FastAPI
7-
from fastapi import UploadFile
7+
from fastapi import UploadFile, Form
88
from fastapi.exceptions import HTTPException
99
from fastapi.middleware.cors import CORSMiddleware
1010

@@ -116,7 +116,7 @@ def extract_img_text(attachment: UploadFile):
116116

117117

118118
@app.post("/ocr")
119-
def ocr(attachment: UploadFile, sychronous: bool = True):
119+
def ocr(attachment: UploadFile, synchronous: bool = Form(True)):
120120
"""
121121
TODO: Support multiple attachments
122122
It could pass a PDF or an image.
@@ -145,7 +145,7 @@ def ocr(attachment: UploadFile, sychronous: bool = True):
145145
elif type_details.mime_type.startswith('application/pdf'):
146146
# Attempt extracting text using pdfminer.six or else through the image conversion -> OCR pipeline.
147147
extraction_function = extract_pdf_text_all
148-
if sychronous is True:
148+
if synchronous is True:
149149
is_success, content = extraction_function(file_path=output_filename)
150150
if is_success is True:
151151
# Add one more step.
@@ -170,15 +170,23 @@ def ocr_result(key: str):
170170

171171

172172
@app.post("/textract-ocr")
173-
def textract_ocr(attachment: UploadFile):
173+
def textract_ocr(attachment: UploadFile, synchronous: bool = Form(True)):
174174
type_details = identify_file_type(attachment.file)
175175
if not type_details.mime_type.startswith('image'):
176176
raise HTTPException(status_code=400, detail="Provide an image")
177177
output_filename = f"/media/textract-ocr-files/{attachment.filename}"
178178
save_file(attachment.file, output_filename)
179179
attachment.file.seek(0)
180-
is_success, content = detect_text(output_filename)
181-
if is_success is True:
182-
return {"content": content}
180+
if synchronous is True:
181+
is_success, content = detect_text(output_filename)
182+
if is_success is True:
183+
return {"content": content}
184+
else:
185+
raise HTTPException(400, detail=content)
183186
else:
184-
raise HTTPException(400, detail=content)
187+
# Add it to a queue.
188+
enqueue_extraction(extraction_function=detect_text, file_path=output_filename)
189+
path_hash = hashlib.sha256(output_filename.encode('utf-8')).hexdigest()
190+
BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000")
191+
link = f"{BASE_URL}/ocr-result/{path_hash}"
192+
return {"link": link}

0 commit comments

Comments
 (0)