@@ -197,14 +197,60 @@ def classify_pan(text: str):
197
197
def analyze_passport (text : str ):
198
198
# Word boundary on both sides.
199
199
# An upper case letter followed by exactly 7 digits
200
+ logger .info ("Analyzing passport" )
201
+ FIRST_NAME = "given name(s)"
202
+ DOB = "date of birth"
203
+ LAST_NAME = "surname"
204
+ first_name = None
205
+ last_name = None
206
+ dob = None
200
207
matches = re .findall (r'\b[A-Z]\d{7}\b' , text )
201
208
passport_number = None
202
209
if len (matches ) > 0 :
203
210
passport_number = matches [0 ]
211
+ try :
212
+ # Even if other fields break, atleast extract the passport number
213
+ lines = text .splitlines ()
214
+ non_blank_lines = [line for line in lines if line .strip () != '' ]
215
+ text = '\n ' .join (non_blank_lines )
216
+ text = text .lower ()
217
+ match_found , match_str , distance = fuzzy_substring_match (text , FIRST_NAME , max_distance = 3 )
218
+ if match_found :
219
+ index = text .index (match_str )
220
+ new_line_index = text .find ('\n ' , index )
221
+ content_after_new_line = text [new_line_index + 1 :]
222
+ name_and_others = content_after_new_line .split ('\n ' )
223
+ if len (name_and_others ) > 0 :
224
+ first_name = name_and_others [0 ]
225
+ match_found , match_str , distance = fuzzy_substring_match (text , LAST_NAME , max_distance = 2 )
226
+ if match_found :
227
+ index = text .index (match_str )
228
+ new_line_index = text .find ('\n ' , index )
229
+ content_after_new_line = text [new_line_index + 1 :]
230
+ name_and_others = content_after_new_line .split ('\n ' )
231
+ if len (name_and_others ) > 0 :
232
+ last_name = name_and_others [0 ]
233
+ match_found , match_str , distance = fuzzy_substring_match (text , DOB , max_distance = 2 )
234
+ if match_found :
235
+ index = text .index (match_str )
236
+ new_line_index = text .find ('\n ' , index )
237
+ content_after_new_line = text [new_line_index + 1 :]
238
+ name_and_others = content_after_new_line .split ('\n ' )
239
+ if len (name_and_others ) > 0 :
240
+ dob = name_and_others [0 ]
241
+ except Exception as e :
242
+ logger .error (e )
243
+ pass
204
244
data = {
205
245
}
206
246
if passport_number is not None :
207
- data ['passport_number' ] = passport_number
247
+ data ['Passport Number' ] = passport_number
248
+ if first_name is not None :
249
+ data ['First Name' ] = first_name
250
+ if last_name is not None :
251
+ data ['Last Name' ] = last_name
252
+ if dob is not None :
253
+ data ['Date Of Birth' ] = dob
208
254
return data
209
255
210
256
0 commit comments