Skip to content

Commit d865ca9

Browse files
committed
Removed redundant APIs
1 parent 4395eba commit d865ca9

File tree

1 file changed

+1
-56
lines changed

1 file changed

+1
-56
lines changed

main.py

Lines changed: 1 addition & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,9 @@
99
from fastapi.exceptions import HTTPException
1010
from fastapi.middleware.cors import CORSMiddleware
1111

12-
from services import identify_file_type, merge_pdfs, save_file, extract_pdf_text_searchable, get_file_size, extract_image_text
12+
from services import identify_file_type, merge_pdfs, save_file
1313
from service_wrappers import extract_image_text_and_set_db, extract_pdf_text_and_set_db
1414
from textract_wrapper import detect_text_and_set_db
15-
from text_analysis import analyze
1615
from tasks import enqueue_extraction
1716
from db import set_object, get_object
1817

@@ -38,18 +37,6 @@ def root():
3837
return "Document Processing"
3938

4039

41-
@app.post("/content-type")
42-
def identify_content_type(attachment: UploadFile):
43-
# Identify the file mime type.
44-
filename = f"/media/content-type-identification/{attachment.filename}"
45-
save_file(attachment.file, filename)
46-
# We read through the file in the last step, i.e save_file().
47-
# We must seek(0), and go to the beginning before trying to identify the file type.
48-
attachment.file.seek(0)
49-
file_type = identify_file_type(attachment.file)
50-
return {"content-type": file_type.mime_type}
51-
52-
5340
@app.post("/pdfs-merge")
5441
def pdfs_merge(attachments: List[UploadFile]):
5542
"""
@@ -75,48 +62,6 @@ def pdfs_merge(attachments: List[UploadFile]):
7562
return {"status": "processed", "filename": merged_filename}
7663

7764

78-
@app.post("/extract-pdf-text")
79-
def extract_text(attachment: UploadFile):
80-
"""
81-
Extracts text from an attachment uploaded through multipart/form-data.
82-
"""
83-
type_details = identify_file_type(attachment.file)
84-
if type_details.mime_type != 'application/pdf':
85-
raise HTTPException(status_code=400, detail="A non-pdf file found.")
86-
attachment_name = attachment.filename
87-
output_filename = f"/media/extraction-pdfs/{attachment_name}"
88-
save_file(attachment.file, output_filename)
89-
attachment.file.seek(0)
90-
is_success, content = extract_pdf_text_searchable(attachment.file)
91-
if is_success is False:
92-
raise HTTPException(status_code=400, detail=content)
93-
analysis_result = analyze(content)
94-
return {"content": content, "analysis_result": analysis_result}
95-
96-
97-
@app.post("/extract-image-text")
98-
def extract_img_text(attachment: UploadFile):
99-
"""
100-
Perform OCR on the uploaded attachment.
101-
Currently works with images having text.
102-
Later add support for PDFs and Docx as well.
103-
"""
104-
type_details = identify_file_type(attachment.file)
105-
if not type_details.mime_type.startswith('image'):
106-
raise HTTPException(status_code=400, detail="A non image file found.")
107-
file_size = get_file_size(attachment.file)
108-
# 100 MB
109-
if file_size > (10 * 1024 * 1024):
110-
raise HTTPException(status_code=400, detail="Only supports upto 10MB files.")
111-
output_filename = f"/media/extraction-images/{attachment.filename}"
112-
attachment.file.seek(0)
113-
save_file(attachment.file, output_filename)
114-
is_success, content = extract_image_text(output_filename)
115-
if is_success is False:
116-
raise HTTPException(status_code=400, detail=content)
117-
return {"content": content}
118-
119-
12065
@app.post("/ocr")
12166
def ocr(attachment: UploadFile, gray: bool = Form(True), denoise: bool = Form(True), binarize: bool = Form(True)):
12267
"""

0 commit comments

Comments
 (0)