Skip to content

Instantly share code, notes, and snippets.

@tvdsluijs
Created July 2, 2020 12:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tvdsluijs/25a8cb7e5c291650c0fefe4bdac6cea9 to your computer and use it in GitHub Desktop.
Save tvdsluijs/25a8cb7e5c291650c0fefe4bdac6cea9 to your computer and use it in GitHub Desktop.
Get text from Images with python and pytesseract
import sys
import pytesseract
from pathlib import Path
from glob import glob
from os.path import join
class ImageOcr:
def __init__(self, my_path: Path = None):
if my_path is None:
print('Error no Path')
self.my_path = my_path
self.items = []
self.filename = "ocr.txt"
self.create_file()
def process_images(self):
files = []
for ext in ('*.gif', '*.png', '*.jpg'):
files.extend(glob(join(self.my_path, ext)))
for file in files:
self.write_to_file(pytesseract.image_to_string(file))
def create_file(self):
file = open(self.filename, "w")
def write_to_file(self, my_str: str = ""):
with open("ocr.txt", "a") as file:
file.write(my_str)
if __name__ == "__main__":
my_path = Path(sys.argv[1])
iocr = ImageOcr(my_path=my_path)
iocr.process_images()
# python img_ocr.py c:\path_to_my_images
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment