Skip to content

Instantly share code, notes, and snippets.

@SouravJohar
Created November 4, 2020 12:23
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save SouravJohar/34b46264bf1cbf7a65ed9ba3e4dd0009 to your computer and use it in GitHub Desktop.
Save SouravJohar/34b46264bf1cbf7a65ed9ba3e4dd0009 to your computer and use it in GitHub Desktop.
import pytesseract
import os
import sys
def read_image(img_path, lang='eng'):
"""
Performs OCR on a single image
:img_path: str, path to the image file
:lang: str, language to be used while conversion (optional, default is english)
Returns
:text: str, converted text from image
"""
try:
return pytesseract.image_to_string(img_path, lang=lang)
except:
return "[ERROR] Unable to process file: {0}".format(img_path)
def read_images_from_dir(dir_path, lang='eng', write_to_file=False):
"""
Performs OCR on all images present in a directory
:dir_path: str, path to the directory of images
:lang: str, language to be used while conversion (optional, default is english)
Returns
:converted_text: dict, mapping of filename to converted text for each image
"""
converted_text = {}
for file_ in os.listdir(dir_path):
if file_.endswith(('png', 'jpeg', 'jpg')):
text = read_image(os.path.join(dir_path, file_), lang=lang)
converted_text[os.path.join(dir_path, file_)] = text
if write_to_file:
for file_path, text in converted_text.items():
_write_to_file(text, os.path.splitext(file_path)[0] + ".txt")
return converted_text
def _write_to_file(text, file_path):
"""
Helper method to write text to a file
"""
print("[INFO] Writing text to file: {0}".format(file_path))
with open(file_path, 'w') as fp:
fp.write(text)
if __name__ == '__main__':
if len(sys.argv) == 1:
print("python3 ocr.py <path>")
print("Provide the path to an image or the path to a directory containing images")
exit(1)
if os.path.isdir(sys.argv[1]):
converted_text_map = read_images_from_dir(sys.argv[1], write_to_file=True)
elif os.path.exists(sys.argv[1]):
print(read_image(sys.argv[1]))
else:
print("Unable to process this file. Please check if it exists and is readable.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment