Skip to content

Instantly share code, notes, and snippets.

@davidkwast
Last active March 2, 2018 02:09
Show Gist options
  • Save davidkwast/6d1c5a7164062c501e680cf04d7ed738 to your computer and use it in GitHub Desktop.
Save davidkwast/6d1c5a7164062c501e680cf04d7ed738 to your computer and use it in GitHub Desktop.
Simple python captcha reader using tesseract OCR
from PIL import Image
import sys
import pyocr
import pyocr.builders
tools = pyocr.get_available_tools()
if len(tools) == 0:
print("No OCR tool found")
sys.exit(1)
# The tools are returned in the recommended order of usage
tool = tools[0]
print("Will use tool '%s'" % (tool.get_name()))
# Ex: Will use tool 'libtesseract'
# https://github.com/ldong/captcha-decoder
from PIL import Image,ImageEnhance,ImageFilter
def iterate(im, iteration):
if iteration == 0:
return im.convert('1')
im = im.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(2)
return iterate(im, iteration-1)
import sys
txt = tool.image_to_string(
iterate(Image.open(sys.argv[1]), 3),
#lang='eng',
builder=pyocr.builders.TextBuilder()
)
print()
print('---- OCR: ----')
print(txt)
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment