Skip to content

Instantly share code, notes, and snippets.

@xavierskip
Created June 1, 2016 07:51
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save xavierskip/95a2a749ceb47ad4eb1ff6d1f8d73c5c to your computer and use it in GitHub Desktop.
Save xavierskip/95a2a749ceb47ad4eb1ff6d1f8d73c5c to your computer and use it in GitHub Desktop.
a simple captcha hacker tool for example
#!/usr/bin/env python
# coding: utf-8
import requests
import StringIO
from PIL import Image
import os
import time
YES = 'X'
NO = '-'
BLACK = 0
WHITE = 255
HERE = os.path.dirname(os.path.abspath(__file__))
THRESHOLD = 135 # calculated manually by find_threshold()
def get_img(url, **kwargs):
""" download image from url
return Image object
"""
r = requests.get(url, **kwargs)
return Image.open(StringIO.StringIO(r.content))
def grey_img(image):
""" too slow to be replaced by Image.convert()
"""
gray_image = Image.new('L', image.size)
gray_data = []
raw_data = image.getdata()
for r, g, b in raw_data:
value = 0.299 * r + 0.578 * g + 0.114 * b
# value = 0.44 * r + 0.44 * g + 0.44 * b
gray_image.putdata(gray_data)
return gray_image
def binarization(image ,threshold=127):
"""
first convert RGB img to grey img
image: Image object
threshold: int
return modes “L” (luminance) greyscale image object
"""
# img = grey_img(image)
img = image.convert('L')
raw_data = img.getdata()
bin_data = []
for d in raw_data:
if d < threshold:
bin_data.append(BLACK)
else:
bin_data.append(WHITE)
img.putdata(bin_data)
return img
def xprojection(img, box=()):
"""
box is region of the img
box: (left, upper, right, lower)
return: list
"""
if not box:
left, upper, (right, lower) = 0, 0, img.size
else:
left, upper, right, lower = box
shadow = [0]*(right-left)
for x in xrange(left, right):
for y in xrange(upper, lower):
if img.getpixel((x, y)) == BLACK:
shadow[x] += 1
return shadow
def yprojection(img, box=()):
"""
box is region of the img
box: (left, upper, right, lower)
return: list
"""
if not box:
left, upper, (right, lower) = 0, 0, img.size
else:
left, upper, right, lower = box
shadow = [0]*(lower-upper)
for y in xrange(upper, lower):
for x in xrange(left, right):
if img.getpixel((x, y)) == BLACK:
shadow[y] += 1
return shadow
def continuous_lines(segment, single=0):
""" projection data partitioning the range
segment: list by xprojection() or yprojection()
single: boolean
return list
"""
if single:
length = len(segment)
start, end = 0, length
for i in segment:
if i > 0:
break
else:
start += 1
else:
start = 0
for i in segment[::-1]:
if i > 0:
break
else:
end -= 1
else:
end = 0
return [start, end]
else:
areas = []
start_end = []
pre = 0
length = len(segment)
for i, j in enumerate(segment):
if j > 0:
if pre == BLACK: # segment head
start_end = [i, None]
elif i == (length-1): # segment end
start_end[1] = i+1 # end, range+1
areas.append(start_end)
else: # j == 0
if start_end:
start_end[1] = i # end, range+1
areas.append(start_end)
start_end = []
pre = j
return areas
def lonely_pixel(pixs, pos):
"""
pixs: image.load()
pos: (x,y)
"""
x,y = pos
xy = [-1,0,1]
for i in xy:
for j in xy:
if i==0 and j==0:
continue # pixel self
try:
p = pixs[x+i,y+j]
except IndexError:
p = WHITE
if p == BLACK:
return False
return True
def minesweeper(img):
""" clear the only one black pixel in 2x2 grid
img: img object must be mode "L"
"""
def _2x2(pixs, pos): # only black in 2x2
x,y = pos
m = None
for i in range(0,2):
for j in range(0,2):
try:
p = pixs[x+i,y+j]
except IndexError:
p = WHITE
if p == BLACK:
if m: # already have a black pixel
return None
else:
m = (x+i, y+j)
return m
pixs = img.load()
width, heigh = img.size
for w in range(0, width, 2):
for h in range(0, heigh, 2):
m = _2x2(pixs, (w,h))
if m:
if lonely_pixel(pixs, m):
x,y = m
pixs[x,y] = WHITE
return img
def img_split(img):
"""
parts of as small as possible world area
return list region of image object
"""
xsize, ysize = img.size
xsegment = xprojection(img)
xs = continuous_lines(xsegment)
regions = []
for i,(start_x, end_x) in enumerate(xs):
ysegment = yprojection(img, (start_x, 0, end_x, ysize))
start_y, end_y = continuous_lines(ysegment, 1)
box = (start_x, start_y, end_x, end_y)
region = img.crop(box)
regions.append(region)
return regions
def img2str(img):
"""
img: Image object
return string
"""
xsize, ysize = img.size
content = ''
for y in xrange(ysize):
line = ''
for x in xrange(xsize):
c = YES if img.getpixel((x, y)) == BLACK else NO
line += c
content += '%s\n' %line
return content
def img_compare(imga, imgb):
"""
:imga source img
:imgb model img
return 0 <= float <= 1
"""
count = 0
a_x = xprojection(imga)
b_x = xprojection(imgb)
for i, n in enumerate(a_x):
try:
count += abs(n - b_x[i])
except IndexError as e:
count += n
a_y = yprojection(imga)
b_y = yprojection(imgb)
for i, n in enumerate(a_y):
try:
count += abs(n - b_y[i])
except IndexError as e:
count += n
x,y = imga.size
return 1-count/2.0/(x*y)
def models_match(img):
"""
return str, model name
"""
counts = []
models_path = os.path.join(HERE, 'models')
filenames = filter(lambda n: n.endswith('png'), os.listdir(models_path))
file_paths = map(lambda n: os.path.join(models_path, '%s' %n), filenames)
for f in file_paths:
model_img = Image.open(f)
result = img_compare(img, model_img)
if result == 1.0:
return os.path.splitext(os.path.basename(f))[0]
counts.append(result)
else:
index = counts.index(max(counts))
return os.path.splitext(filenames[index])[0]
def gen_models():
filenames = filter(lambda n: n.endswith('png'), os.listdir('models'))
for f in filenames:
name = os.path.splitext(f)[0]
with open(os.path.join('models', '%s.txt' %name), 'w') as txt:
img = Image.open('models/%s' %f)
content = img2str(img)
img.close()
txt.write(content)
def find_threshold():
""" compare threshold effect by manually
"""
captcha = get_img(JZWJW_CAPTCHA_URL)
for i in range(80,200):
bin_captcha = binarization(captcha, i)
bin_captcha.save(os.path.join('training','%s.png' %i ))
bin_captcha.close()
def train():
""" test for img_split
"""
captcha = get_img(CAPTCHA_URL)
bin_captcha = binarization(captcha, THRESHOLD)
bin_captcha = minesweeper(bin_captcha)
name = time.time()
bin_captcha.save(os.path.join('training','%s.png' %name ))
img_crop = img_split(bin_captcha)
for i,img in enumerate(img_crop):
img.save(os.path.join('training','%s-%s.png' %(name,i)))
bin_captcha.close()
def hack_captcha(img, n=4):
""" guess the captcha code, main to be used
img: Image object
n: char in the image
return str
"""
code = ''
bin_captcha = binarization(img, THRESHOLD)
bin_captcha = minesweeper(bin_captcha)
for img in img_split(bin_captcha):
code += models_match(img)
return code[:n]
def main():
""" test for hack_captcha()
"""
count = 0
for i in xrange(10):
captcha = get_img(JZWJW_CAPTCHA_URL)
captcha.show()
print hack_captcha(captcha)
r = raw_input('Y/N')
if r == '\n':
count += 1
print count
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment