Last active
January 7, 2024 10:03
-
-
Save amirshnll/fd4898899babf52d5418bf4ccd9a22fe to your computer and use it in GitHub Desktop.
Get Person Names from Passports by Python!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PIL import Image | |
import pytesseract | |
import re | |
def extract_name_from_id_card(image): | |
try: | |
image = Image.open(image) | |
ocr_text = pytesseract.image_to_string(image).lower().replace(" ", "") | |
name_pattern = re.compile(r"p<([^<]+)<<([^<]+)<") | |
matches = name_pattern.findall(ocr_text) | |
if len(matches) == 1: | |
return matches[0][::-1] | |
else: | |
words = ocr_text.split() | |
possible_names = [ | |
word for word in words if len(word) > 3 and word.isalpha() | |
] | |
return possible_names[:2] | |
except Exception as e: | |
return None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment