Skip to content

Instantly share code, notes, and snippets.

@odedlaz
Last active February 4, 2017 22:04
Show Gist options
  • Save odedlaz/22386812cbc4ecfb1331e4af982ad760 to your computer and use it in GitHub Desktop.
Save odedlaz/22386812cbc4ecfb1331e4af982ad760 to your computer and use it in GitHub Desktop.
OCR using tesserocr
from tesserocr import PyTessBaseAPI
import sys
import os
# tesserocr -> https://pypi.python.org/pypi/tesserocr
# cython -> https://pypi.python.org/pypi/Cython
# Pillow -> https://pypi.python.org/pypi/Pillow
if len(sys.argv) != 2:
print("you need to pass the path to the image as first argument")
sys.exit(1)
path = sys.argv[1]
if not os.path.exists(path):
print("image doesn't exist at: " + path)
sys.exit(2)
with PyTessBaseAPI() as api:
api.SetImageFile(os.path.abspath(path))
lines = [l.strip() for l in api.GetUTF8Text().split("\n")
if l.strip() != ""]
for l in lines:
print(l)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment