Skip to content

Commit a21d508

Browse files
committed
Added ability to answer things like who, when, where, what etc. for the given text.
1 parent 433362a commit a21d508

File tree

4 files changed

+169
-7
lines changed

4 files changed

+169
-7
lines changed

html/converse.html

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8" />
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
6+
<title>NLP Question Answering</title>
7+
<script src="https://cdn.tailwindcss.com"></script>
8+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;800&display=swap" rel="stylesheet" />
9+
<style>
10+
body {
11+
font-family: 'Inter', sans-serif;
12+
}
13+
</style>
14+
</head>
15+
<body class="bg-gradient-to-br from-blue-100 via-purple-100 to-pink-100 min-h-screen flex flex-col">
16+
<!-- Header -->
17+
<header class="bg-white shadow-md sticky top-0 z-10">
18+
<div class="max-w-7xl mx-auto px-6 py-4 flex justify-between items-center">
19+
<h1 class="text-2xl font-bold text-purple-600">Playing with Natural Language Processing</h1>
20+
</div>
21+
</header>
22+
23+
<!-- Main Section -->
24+
<main class="flex-grow px-4 py-16">
25+
<!-- Hero -->
26+
<section class="text-center mb-12">
27+
<h2 class="text-4xl md:text-5xl font-extrabold text-gray-800 mb-4">
28+
Question Answering
29+
</h2>
30+
<p class="text-lg text-gray-600 max-w-2xl mx-auto">
31+
Get your answers for Who, What, When, Where, How much etc.
32+
</p>
33+
</section>
34+
35+
<!-- Form -->
36+
<section class="max-w-xl mx-auto bg-white p-8 rounded-xl shadow-lg mb-8">
37+
<form id="qaForm" class="space-y-6">
38+
<div>
39+
<label class="block text-gray-700 font-medium mb-1" for="textInput">Text</label>
40+
<input type="text" id="textInput" name="text" placeholder="Enter your sentence"
41+
class="w-full px-4 py-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-purple-500" required />
42+
</div>
43+
<div>
44+
<label class="block text-gray-700 font-medium mb-1" for="questionInput">Question</label>
45+
<input type="text" id="questionInput" name="question" placeholder="Enter your question"
46+
class="w-full px-4 py-3 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-purple-500" required />
47+
</div>
48+
<button type="submit"
49+
class="w-full bg-purple-600 hover:bg-purple-700 text-white font-semibold py-3 rounded-lg transition">
50+
Submit
51+
</button>
52+
</form>
53+
</section>
54+
55+
<!-- Answer Section -->
56+
<section id="answerSection" class="max-w-xl mx-auto hidden">
57+
<div class="bg-white p-6 rounded-lg shadow-lg border-l-8 border-purple-500">
58+
<h4 class="text-lg font-semibold text-purple-700 mb-2">Answer:</h4>
59+
<p id="answerText" class="text-gray-800 text-xl font-medium"></p>
60+
</div>
61+
</section>
62+
63+
<!-- Examples Section -->
64+
<section class="max-w-3xl mx-auto mt-20">
65+
<h3 class="text-2xl font-bold text-gray-800 mb-6">Examples</h3>
66+
67+
<div class="bg-white p-6 rounded-lg shadow mb-6">
68+
<p class="mb-2"><strong class="text-purple-600">Text:</strong> Apaar went to school.</p>
69+
<p class="mb-1"><strong class="text-gray-700">Question:</strong> Who went to school?</p>
70+
</div>
71+
72+
<div class="bg-white p-6 rounded-lg shadow mb-6">
73+
<p class="mb-2"><strong class="text-purple-600">Text:</strong> The book was read by Akshar.</p>
74+
<p><strong class="text-gray-700">Other question:</strong> What was read?</p>
75+
</div>
76+
77+
<div class="bg-white p-6 rounded-lg shadow mb-6">
78+
<p class="mb-2"><strong class="text-purple-600">Text:</strong> My birthday is on 22nd January.</p>
79+
<p class="mb-1"><strong class="text-gray-700">Question:</strong> When is my birthday?</p>
80+
</div>
81+
82+
<div class="bg-white p-6 rounded-lg shadow mb-6">
83+
<p class="mb-2"><strong class="text-purple-600">Text:</strong> We visited Colombo for a vacation.</p>
84+
<p class="mb-1"><strong class="text-gray-700">Question:</strong> Where did you go?</p>
85+
</div>
86+
87+
<div class="bg-white p-6 rounded-lg shadow mb-6">
88+
<p class="mb-2"><strong class="text-purple-600">Text:</strong> The eggs cost Rs 20.</p>
89+
<p class="mb-1"><strong class="text-gray-700">Question:</strong> How much did it cost?</p>
90+
</div>
91+
</section>
92+
</main>
93+
94+
<!-- Footer -->
95+
<footer class="bg-white text-center text-gray-500 text-sm py-6 shadow-inner mt-12">
96+
&copy; Akshar Raaj
97+
</footer>
98+
99+
<!-- JS -->
100+
<script>
101+
const form = document.getElementById('qaForm');
102+
const answerSection = document.getElementById('answerSection');
103+
const answerText = document.getElementById('answerText');
104+
105+
form.addEventListener('submit', async (e) => {
106+
e.preventDefault();
107+
108+
const text = document.getElementById('textInput').value.trim();
109+
const question = document.getElementById('questionInput').value.trim();
110+
111+
try {
112+
const response = await fetch('http://localhost:8000/converse', {
113+
method: 'POST',
114+
headers: {
115+
'Content-Type': 'application/json'
116+
},
117+
body: JSON.stringify({ text, question })
118+
});
119+
120+
if (!response.ok) {
121+
throw new Error('Failed to fetch answer.');
122+
}
123+
124+
const data = await response.json();
125+
answerText.textContent = data.answer || 'No answer returned.';
126+
answerSection.classList.remove('hidden');
127+
} catch (error) {
128+
answerText.textContent = 'An error occurred. Please try again.';
129+
answerSection.classList.remove('hidden');
130+
}
131+
});
132+
</script>
133+
</body>
134+
</html>

language_processing.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,14 @@ def converse(text: str, question: str):
9494
doc = nlp(text)
9595
lowered_question = question.lower()
9696
for token in doc:
97-
logger.info(f"Token: {token.text}, POS: {token.pos_}")
97+
logger.info(f"Token: {token.text}, POS: {token.pos_}, Dep: {token.dep_}")
9898
if token.pos_ == "PROPN":
9999
proper_nouns.append(token)
100100
if token.pos_ == "VERB":
101101
verbs.append(token)
102102
if token.dep_ == "nsubj":
103103
subjects.append(token)
104-
if token.dep_ == "pobj":
104+
if token.dep_ in ["pobj", "dobj"]:
105105
objects.append(token)
106106
if token.pos_ == 'ADP':
107107
prepositions.append(token)
@@ -114,6 +114,7 @@ def converse(text: str, question: str):
114114
logger.info(f"Nouns: {proper_nouns}")
115115
logger.info(f"Verbs: {verbs}")
116116
logger.info(f"Subjects: {subjects}")
117+
logger.info(f"Objects: {objects}")
117118
logger.info(f"Prepositions: {prepositions}")
118119
if "who" in lowered_question:
119120
# The answer should probably be a proper noun.
@@ -123,26 +124,30 @@ def converse(text: str, question: str):
123124
# Hence dependency parsing can help us get that.
124125
# We are currently dealing with single sentences.
125126
# TODO: Modify it to get more context from the question, and then infer the correct subject
126-
return subjects[0]
127+
return subjects[0].text
127128
if "where" in lowered_question:
128129
# It means we want a place as answer
129130
# The answer should probably be a noun
130131
# Very likely it is followed by a prepositional phrase.
131132
# Examples: They went "to" Colombo, kept on "the" table. etc.
132133
if len(objects) > 0:
133-
return objects[0]
134+
return objects[0].text
134135
# Statements like "apaar went to play"
135136
# Here play is not an object. So use the token appearing right after preposition
136137
if len(prepositions) > 0:
137138
prep = prepositions[0]
138-
return doc[prep.i + 1]
139+
token = doc[prep.i + 1]
140+
return token.text
139141
if "how much" in lowered_question:
140142
# A quantity has to be returned
141143
# A quantity would mean a numeric
142144
if len(numerics) > 0:
143-
return numerics[0]
145+
return numerics[0].text
144146
if "when" in lowered_question:
145147
# A date has to be returned
146148
if len(dates) > 0:
147-
return dates[0]
149+
return dates[0].text
150+
if "what" in lowered_question:
151+
if len(objects) > 0:
152+
return objects[0].text
148153
return None

main.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
from fastapi.exceptions import HTTPException
1010
from fastapi.middleware.cors import CORSMiddleware
1111

12+
from models import ConverseModel
1213
from services import identify_file_type, merge_pdfs, save_file
1314
from service_wrappers import extract_image_text_and_set_db, extract_pdf_text_and_set_db
1415
from textract_wrapper import detect_text_and_set_db
16+
from language_processing import converse
1517
from tasks import enqueue_extraction
1618
from db import set_object, get_object
1719

@@ -148,3 +150,18 @@ def textract_ocr(attachment: UploadFile):
148150
BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000")
149151
link = f"{BASE_URL}/ocr-result/{path_hash}"
150152
return {"link": link}
153+
154+
155+
@app.post("/converse")
156+
def conversation(body: ConverseModel):
157+
"""
158+
Performs things like:
159+
- Tokenization
160+
- Parts of Speech tagging
161+
- Named Entity Recognition
162+
"""
163+
answer = converse(body.text, body.question)
164+
logger.info(f"Answer: {answer}")
165+
if answer is None:
166+
answer = "Failed to parse"
167+
return {"answer": answer}

models.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from pydantic import BaseModel
2+
3+
4+
class ConverseModel(BaseModel):
5+
text: str
6+
question: str

0 commit comments

Comments
 (0)