Skip to content

Commit e17ac52

Browse files
committed
Extract Name, Surname and Date of Birth from passports.
1 parent 4c69a4c commit e17ac52

File tree

1 file changed

+47
-1
lines changed

1 file changed

+47
-1
lines changed

text_analysis.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,14 +197,60 @@ def classify_pan(text: str):
197197
def analyze_passport(text: str):
198198
# Word boundary on both sides.
199199
# An upper case letter followed by exactly 7 digits
200+
logger.info("Analyzing passport")
201+
FIRST_NAME = "given name(s)"
202+
DOB = "date of birth"
203+
LAST_NAME = "surname"
204+
first_name = None
205+
last_name = None
206+
dob = None
200207
matches = re.findall(r'\b[A-Z]\d{7}\b', text)
201208
passport_number = None
202209
if len(matches) > 0:
203210
passport_number = matches[0]
211+
try:
212+
# Even if other fields break, atleast extract the passport number
213+
lines = text.splitlines()
214+
non_blank_lines = [line for line in lines if line.strip() != '']
215+
text = '\n'.join(non_blank_lines)
216+
text = text.lower()
217+
match_found, match_str, distance = fuzzy_substring_match(text, FIRST_NAME, max_distance=3)
218+
if match_found:
219+
index = text.index(match_str)
220+
new_line_index = text.find('\n', index)
221+
content_after_new_line = text[new_line_index+1:]
222+
name_and_others = content_after_new_line.split('\n')
223+
if len(name_and_others) > 0:
224+
first_name = name_and_others[0]
225+
match_found, match_str, distance = fuzzy_substring_match(text, LAST_NAME, max_distance=2)
226+
if match_found:
227+
index = text.index(match_str)
228+
new_line_index = text.find('\n', index)
229+
content_after_new_line = text[new_line_index+1:]
230+
name_and_others = content_after_new_line.split('\n')
231+
if len(name_and_others) > 0:
232+
last_name = name_and_others[0]
233+
match_found, match_str, distance = fuzzy_substring_match(text, DOB, max_distance=2)
234+
if match_found:
235+
index = text.index(match_str)
236+
new_line_index = text.find('\n', index)
237+
content_after_new_line = text[new_line_index+1:]
238+
name_and_others = content_after_new_line.split('\n')
239+
if len(name_and_others) > 0:
240+
dob = name_and_others[0]
241+
except Exception as e:
242+
logger.error(e)
243+
pass
204244
data = {
205245
}
206246
if passport_number is not None:
207-
data['passport_number'] = passport_number
247+
data['Passport Number'] = passport_number
248+
if first_name is not None:
249+
data['First Name'] = first_name
250+
if last_name is not None:
251+
data['Last Name'] = last_name
252+
if dob is not None:
253+
data['Date Of Birth'] = dob
208254
return data
209255

210256

0 commit comments

Comments
 (0)