Skip to content

Instantly share code, notes, and snippets.

@CodyKochmann
Created February 17, 2023 13:27
Show Gist options
  • Save CodyKochmann/1d77ecefa9e6d486165030402f88c39a to your computer and use it in GitHub Desktop.
Save CodyKochmann/1d77ecefa9e6d486165030402f88c39a to your computer and use it in GitHub Desktop.
python ocr conversion of an image from a url
# by: Cody Kochmann
# this script pulls the contents of $TARGET_IMAGE
# and uses pytesseract against it to output the
# text contents of the image to stdout
import os, sys, functools
import requests
try:
from PIL import Image
except ImportError:
import Image
import pytesseract
eprint = functools.partial(print, file=sys.stderr)
# If you don't have tesseract executable in your PATH, include the following:
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
# Example tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract'
# Simple image to string
eprint('grabbing content from:', os.environ['TARGET_IMAGE'])
with open('test.png','wb') as f:
f.write(
requests.get(
os.environ['TARGET_IMAGE']
).content
)
eprint('success')
eprint('running ocr on test.png...')
print(pytesseract.image_to_string(Image.open('test.png')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment