Last active
August 29, 2015 14:10
-
-
Save gjuric/e173bb601085cb68272b to your computer and use it in GitHub Desktop.
Python captcha solver
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# apt-get install libjpeg-dev libpng-dev | |
# apt-get install tesseract-ocr libtesseract3 | |
# apt-get install python-pip python-dev | |
# Or install pip with wget https://bootstrap.pypa.io/get-pip.py -> python get-pip.py | |
# /usr/local/bin/pip install pillow | |
# Download and unpack https://code.google.com/p/pytesser/ | |
# edit pytesser.py and change "import Image" to "from PIL import Image" | |
from PIL import Image | |
from PIL import ImageEnhance | |
from pytesser import * | |
from urllib import urlretrieve | |
def get(link): | |
urlretrieve(link,'temp.png') | |
get('http://example.com/captcha.png'); | |
im = Image.open("temp.png") | |
nx, ny = im.size | |
im2 = im.resize((int(nx*5), int(ny*5)), Image.BICUBIC) | |
im2.save("temp2.png") | |
enh = ImageEnhance.Contrast(im) | |
enh.enhance(1.3).show("30% more contrast") | |
imgx = Image.open('temp2.png') | |
imgx = imgx.convert("RGBA") | |
pix = imgx.load() | |
for y in xrange(imgx.size[1]): | |
for x in xrange(imgx.size[0]): | |
if pix[x, y] != (0, 0, 0, 255): | |
pix[x, y] = (255, 255, 255, 255) | |
imgx.save("bw.gif", "GIF") | |
original = Image.open('bw.gif') | |
bg = original.resize((116, 56), Image.NEAREST) | |
ext = ".tif" | |
bg.save("input-NEAREST" + ext) | |
image = Image.open('input-NEAREST.tif') | |
print image_to_string(image) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment