Last active
February 4, 2017 22:04
-
-
Save odedlaz/22386812cbc4ecfb1331e4af982ad760 to your computer and use it in GitHub Desktop.
OCR using tesserocr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tesserocr import PyTessBaseAPI | |
import sys | |
import os | |
# tesserocr -> https://pypi.python.org/pypi/tesserocr | |
# cython -> https://pypi.python.org/pypi/Cython | |
# Pillow -> https://pypi.python.org/pypi/Pillow | |
if len(sys.argv) != 2: | |
print("you need to pass the path to the image as first argument") | |
sys.exit(1) | |
path = sys.argv[1] | |
if not os.path.exists(path): | |
print("image doesn't exist at: " + path) | |
sys.exit(2) | |
with PyTessBaseAPI() as api: | |
api.SetImageFile(os.path.abspath(path)) | |
lines = [l.strip() for l in api.GetUTF8Text().split("\n") | |
if l.strip() != ""] | |
for l in lines: | |
print(l) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment