laobubu/@Stupid-CAPTCHA-OCR.md

## @Stupid-CAPTCHA-OCR.md

      
    Raw
  

              @Stupid-CAPTCHA-OCR.md
            
          
    一个超级简单的验证码识别。据说准确率比深度学习还要高哦（ε=ε=ε=┏(゜v゜;)┛
依赖：pip install Pillow
来源是 V2EX 上被喷惨的帖子：
https://www.v2ex.com/t/354318
Author: laobubu
（纯粹个人兴趣掐着表写的，没收钱。一个小时内写完。PIL 的文档真难看。）
测试效果：


## test.py
from PIL import Image, ImageChops

# uncomment these lines to generate `_chars2`
# _chars = [ Image.open("ch/%d.png"%i).convert('L') for i in range(10) ]
# _chars2 = [ c.tobytes('raw').replace('\x00','0').replace('\xff','1') for c in _chars ]

_chars2 = [
    '001110011011011011110011110011110011110011110110110110011100',
    '000011001111011110000110000110000110000110001100001100001100',
    '001110011011011011000011000011000110001100011000110000111111',
    '001110011011000011000011001110000010000011000011110110011110',
    '000011000111001110011110010110110110100110111111001100001100',
    '001111001100001100011000011110000011000011110011110010011100',
    '001110011011010000110110111011110011110011110011110110011100',
    '111111000011000110000110001100001100001100011000011000011000',
    '001110011011011011011011001100010110110110110110110110011100',
    '001110011011110011110011110011110111011011000110110110011100'
]

_chars = [Image.frombytes('L', (6,10), s.replace('0','\x00').replace('1','\xFF')) for s in _chars2]

def getch(im):
    rt = '1'
    diff = 99999
    for i in range(10):
        diff2 = sum(1 if ord(pix) > 0 else 0 for pix in ImageChops.difference(im, _chars[i]).tobytes('raw'))
        if diff2 < diff:
            rt = str(i)
            diff = diff2
    return rt


def handle(url):
    im = Image.open(url)
    w, h = im.size
    im = im.crop((1, 1, w-2, h-2))
    im = im.convert('HSV')

    channels = im.split()
    h=channels[0].point(lambda i: 255 if i > 170 and i < 199 else 0 )
    s=channels[1].point(lambda i: 255 if i > 253 else 0 )
    im = h and s

    c = [im.crop((x,4,x+6,14)) for x in (4+i*7 for i in range(4))]

    return ''.join([getch(ci) for ci in c])

for i in range(1, 6):
    fn = "%d.png"%i
    print(fn + " is " + handle(fn))
	from PIL import Image, ImageChops

	# uncomment these lines to generate `_chars2`
	# _chars = [ Image.open("ch/%d.png"%i).convert('L') for i in range(10) ]
	# _chars2 = [ c.tobytes('raw').replace('\x00','0').replace('\xff','1') for c in _chars ]

	_chars2 = [
	'001110011011011011110011110011110011110011110110110110011100',
	'000011001111011110000110000110000110000110001100001100001100',
	'001110011011011011000011000011000110001100011000110000111111',
	'001110011011000011000011001110000010000011000011110110011110',
	'000011000111001110011110010110110110100110111111001100001100',
	'001111001100001100011000011110000011000011110011110010011100',
	'001110011011010000110110111011110011110011110011110110011100',
	'111111000011000110000110001100001100001100011000011000011000',
	'001110011011011011011011001100010110110110110110110110011100',
	'001110011011110011110011110011110111011011000110110110011100'
	]

	_chars = [Image.frombytes('L', (6,10), s.replace('0','\x00').replace('1','\xFF')) for s in _chars2]

	def getch(im):
	rt = '1'
	diff = 99999
	for i in range(10):
	diff2 = sum(1 if ord(pix) > 0 else 0 for pix in ImageChops.difference(im, _chars[i]).tobytes('raw'))
	if diff2 < diff:
	rt = str(i)
	diff = diff2
	return rt



	def handle(url):
	im = Image.open(url)
	w, h = im.size
	im = im.crop((1, 1, w-2, h-2))
	im = im.convert('HSV')

	channels = im.split()
	h=channels[0].point(lambda i: 255 if i > 170 and i < 199 else 0 )
	s=channels[1].point(lambda i: 255 if i > 253 else 0 )
	im = h and s

	c = [im.crop((x,4,x+6,14)) for x in (4+i*7 for i in range(4))]

	return ''.join([getch(ci) for ci in c])

	for i in range(1, 6):
	fn = "%d.png"%i
	print(fn + " is " + handle(fn))