Skip to content

Instantly share code, notes, and snippets.

Created March 4, 2017 16:50
Show Gist options
  • Save Van1996/4c916a2e1599902b8f829ed3adbf1227 to your computer and use it in GitHub Desktop.
Save Van1996/4c916a2e1599902b8f829ed3adbf1227 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-、
__author__ = 'West'
import os
from PIL import Image, ImageDraw
from collections import namedtuple
import urllib.request
# 巧妙的使用namedtuple,定义一种tuple类型,包含'Image','Charact'
from collections import namedtuple
ImageMap = namedtuple('ImageMap', ['Image', 'Charact'])
# 四个文件保存的路径
srcPath = os.path.abspath('.') + r'\orc\src'
truePath = os.path.abspath('.') + r'\orc\true'
trainPath = os.path.abspath('.') + r'\orc\data'
testPath = os.path.abspath('.') + r'\orc\test'
# 保存单字符图片和字符的映射
trainMap = None
# 判断像素点是否为蓝色
def isBlue(color):
if (color[0] + color[1] + color[2]) == 153:
return 1
return 0
# 判断像素点是否为黑色
def isBlack(color):
if (color[0] + color[1] + color[2]) <= 100:
return 1
return 0
# 黑白化
def removeBackground(picName):
img =
img = img.crop((5, 1, 55, img.size[1] - 1))
img = img.convert('RGB')
width, height = img.size
for x in range(width):
for y in range(height):
if isBlue(img.getpixel((x, y))) == 1:
img.putpixel((x, y), (0, 0, 0))
img.putpixel((x, y), (255, 255, 255))
return img
# 复制图片的局部区域
def copyImage(img, beginPoint):
width, height = img.size
width = int(width / 4)
tempImg ='RGB', (width, height))
for x in range(width):
for y in range(height):
tempImg.putpixel((x, y), img.getpixel((beginPoint[0] + x, beginPoint[1] + y)))
return tempImg
# 将图片分割为4份
def spliteImage(img):
subImgList = []
width, height = img.size
width = int(width / 4)
subImgList.append(copyImage(img, (0, 0)))
subImgList.append(copyImage(img, (width, 0)))
subImgList.append(copyImage(img, (width * 2, 0)))
subImgList.append(copyImage(img, (width * 3, 0)))
return subImgList
# 打开图片,并将其分割为4部分
def spliteImageFile(filename):
img =
return spliteImage(img)
# 从true文件夹中导出黑白图片到train文件夹中
def srcToTrain():
for filename in os.listdir(srcPath):
abspath = srcPath + '\\' + filename
img = removeBackground(abspath)
# print(os.path.split(filename)[0])
abspath = trainPath + '\\' + filename[0:4] + '.jpg'
# 加载训练的数据,存放到映射表trainMap中
def loadTrainData():
map = []
truep = truePath + '\\'
for filename in os.listdir(trainPath):
abspath = trainPath + '\\' + filename
images = spliteImageFile(abspath)
i = 0
for img in images:
# 保存单个字符
# name = truePath + '\\' + filename[0:4] + '-' + str(i) + '-' + filename[i] + '.jpg'
IM = ImageMap(img, filename[i])
i += 1
global trainMap
trainMap = map
print('len of trainMap is', len(trainMap))
# 识别一个单一字符
# img:一张带有一个字符的待识别图片
# map: Image为图片数据,Charact为图片中的对应字符
def getSingleCharOcr(img, map):
result = '#'
width, height = img.size
minDiffPixelNum = width * height
for IM in map:
image = IM.Image
charact = IM.Charact
count = 0
if abs(width - image.size[0]) > 2:
widthmin = min(width, image.size[0])
heightmin = min(height, image.size[1])
for x in range(widthmin):
for y in range(heightmin):
if isBlack(image.getpixel((x, y))) != isBlack(img.getpixel((x, y))):
if count >= minDiffPixelNum:
count += 1
# else:
# if count >= minDiffPixelNum:
# break
if count < minDiffPixelNum:
minDiffPixelNum = count
result = charact
return result
# 等到图片的验证码,共四个字符
def getAllOcr(filename):
img = removeBackground(filename)
listImg = spliteImage(img)
global trainMap
map = trainMap
result = ''
for image in listImg:
result += getSingleCharOcr(image, map)
return result
def initOrc():
if trainMap is None:
if __name__ == '__main__':
# 测试
for i in range(10):
request = urllib.request.Request('')
response = urllib.request.urlopen(request)
checkcodePicture =
checkImagePath = os.path.abspath('.') + r'\orc\checkCodeImg.jpg'
with open(checkImagePath, 'wb') as f:
string = getAllOcr(checkImagePath)
#img =
#abspath = testPath + '\\' + string + r'.gif'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment