4
4
from typing import List
5
5
6
6
from fastapi import FastAPI
7
- from fastapi import UploadFile
7
+ from fastapi import UploadFile , Form
8
8
from fastapi .exceptions import HTTPException
9
9
from fastapi .middleware .cors import CORSMiddleware
10
10
@@ -116,7 +116,7 @@ def extract_img_text(attachment: UploadFile):
116
116
117
117
118
118
@app .post ("/ocr" )
119
- def ocr (attachment : UploadFile , sychronous : bool = True ):
119
+ def ocr (attachment : UploadFile , synchronous : bool = Form ( True ) ):
120
120
"""
121
121
TODO: Support multiple attachments
122
122
It could pass a PDF or an image.
@@ -145,7 +145,7 @@ def ocr(attachment: UploadFile, sychronous: bool = True):
145
145
elif type_details .mime_type .startswith ('application/pdf' ):
146
146
# Attempt extracting text using pdfminer.six or else through the image conversion -> OCR pipeline.
147
147
extraction_function = extract_pdf_text_all
148
- if sychronous is True :
148
+ if synchronous is True :
149
149
is_success , content = extraction_function (file_path = output_filename )
150
150
if is_success is True :
151
151
# Add one more step.
@@ -170,15 +170,23 @@ def ocr_result(key: str):
170
170
171
171
172
172
@app .post ("/textract-ocr" )
173
- def textract_ocr (attachment : UploadFile ):
173
+ def textract_ocr (attachment : UploadFile , synchronous : bool = Form ( True ) ):
174
174
type_details = identify_file_type (attachment .file )
175
175
if not type_details .mime_type .startswith ('image' ):
176
176
raise HTTPException (status_code = 400 , detail = "Provide an image" )
177
177
output_filename = f"/media/textract-ocr-files/{ attachment .filename } "
178
178
save_file (attachment .file , output_filename )
179
179
attachment .file .seek (0 )
180
- is_success , content = detect_text (output_filename )
181
- if is_success is True :
182
- return {"content" : content }
180
+ if synchronous is True :
181
+ is_success , content = detect_text (output_filename )
182
+ if is_success is True :
183
+ return {"content" : content }
184
+ else :
185
+ raise HTTPException (400 , detail = content )
183
186
else :
184
- raise HTTPException (400 , detail = content )
187
+ # Add it to a queue.
188
+ enqueue_extraction (extraction_function = detect_text , file_path = output_filename )
189
+ path_hash = hashlib .sha256 (output_filename .encode ('utf-8' )).hexdigest ()
190
+ BASE_URL = os .environ .get ("BASE_URL" , "http://localhost:8000" )
191
+ link = f"{ BASE_URL } /ocr-result/{ path_hash } "
192
+ return {"link" : link }
0 commit comments