titaneric/OCR.py

## OCR.py
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import urllib.request
from PIL import Image
from selenium.webdriver.support.select import Select
from PIL import ImageEnhance
import matplotlib.pyplot as plt
import cv2
import numpy as np
import urllib.request
from lxml import etree
import os
import shutil
import sqlite3


## get the image source
#img = driver.find_element_by_xpath('/html/body/img')
dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR')

for file in os.listdir(dir_path):
    if file.endswith('.png'):
        os.remove(dir_path + "\\" + file)

#dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set')

#for file in os.listdir(dir_path):
#    if file.endswith('.png'):
#        os.remove(dir_path + "\\" + file)

src = 'https://isdna1.yzu.edu.tw/CnStdSel/SelRandomImage.aspx'
#download the image
urllib.request.urlretrieve(src, "captcha.png")
#driver.close()
Im = Image.open('captcha.png')
width, height = Im.size
quartersizedIm = Im.resize((int(width * 6), int(height * 6)))
quartersizedIm.save('foursized.png')
im = Image.open('foursized.png')

xsize, ysize = im.size


enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(3.0)
enhancer = ImageEnhance.Brightness(im)
im = enhancer.enhance(10.0)
xsize, ysize = im.size
im.save('enhance.png')


pix = im.load()
for y in range(0, ysize):
    for x in range(0, xsize):
        if pix[x , y][1] != 255 and pix[x , y][2] == 255 and pix[x , y][3] == 255:
            pix[x , y] = (0 , 0 , 0 , 255)
        else:
            pix[x , y] = (255 , 255 , 255 , 255)

im.save('new.png')

kernel = np.ones((1, 1), np.uint16)
image = cv2.imread('new.png')
erosion = cv2.erode(image, kernel, iterations = 1)
blurred = cv2.GaussianBlur(erosion, (5, 5), 0)
edged = cv2.Canny(blurred, 30, 150)
plt.imshow(edged)
#plt.show()
dilation = cv2.dilate(edged, kernel, iterations = 1)
plt.imshow(dilation)
#plt.show()
cv2.imwrite('process.png', dilation)
img1 = cv2.imread('new.png')
img2 = cv2.imread('process.png')
img1 = cv2.bitwise_not(img1)
final = cv2.add(img1, img2)

#erosion = cv2.erode(final, kernel, iterations = 1)
#blurred = cv2.GaussianBlur(erosion, (5, 5), 0)
final = cv2.bitwise_not(final)
plt.imshow(final)
#plt.show()
#cv2.imwrite('final.png', final)
gray = cv2.cvtColor(final,cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
res = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
plt.imshow(res)
#plt.show()
cv2.imwrite('perfect.png', res)

def mse(imgA, imgB):
    err = np.sum(imgA.astype("float") - imgB.astype("float") ** 2)
    err /= float(imgA.shape[0] * imgA.shape[1])
    return err
def findBorder(axis, color, leng, img):

    findNonTextColor = False
    black = [0, 0, 0]
    white = [255, 255, 255]


    textBorder = axis + 1


    textColorNum = leng
    cntWhite = 0

    while  (not findNonTextColor):
        #cntNum = 0
        #grayImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        for i in range(leng):#decideRange[0], decideRange[1]):
            if leng == img.shape[0]:
                coordinate = i, axis
            else:
                coordinate = axis, i

            # if column of image is black

            if (img.item(coordinate[0], coordinate[1], 0) == color[0]) and (img.item(coordinate[0], coordinate[1], 1) == color[1]) and (img.item(coordinate[0], coordinate[1], 2) == color[2]):

                #cntNum += 1

                if color == black:
                    findNonTextColor = True
                    textBorder = axis
                    break
                else:
                    cntWhite += 1
        if cntWhite == leng:
            textBorder = axis
            findNonTextColor = True
            break
        axis += 1

        cntWhite = 0

    return textBorder
def splitWord(img, currentCol):
    height, width = img.shape[:2]
    black = [0, 0, 0]
    white = [255, 255, 255]

    line = currentCol + 1
    leftSide = findBorder(line, black, height, img)

    line = leftSide
    rightSide = findBorder(line, white,height, img)
    line = 0
    topSide = findBorder(line, black, width, img)

    line = topSide
    bottomSide = findBorder(line, white, width, img)

    charImg = img[topSide:bottomSide, leftSide:rightSide]
    return charImg, rightSide
def rotateImage(image, angle):
    (h, w) = image.shape[:2]
    center = (w / 2, h / 2)


    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), borderMode = cv2.BORDER_CONSTANT, borderValue = (255, 255, 255))


    return rotated
def makeInnerBorder(img, left, right, top, bottom, color):
    height, width = img.shape[:2]
    for i in range(height):
        for l in range(left):
            img[i, l] = color
        for r in range(right):
            img[i, r] = color
    for j in range(width):
        for t in range(top):
            img[t, j] = color
        for b in range(bottom):
            img[b, j] = color
    return img
def verticalProjection(img):
    height, width = img.shape[:2]
    projDict = dict()
    for i in range(width):
        projDict[i] = 0
    for h in range(height):

        for w in range(width):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                projDict[w] += 1


    min_value = min(projDict.values())
    result = [key for key, value in projDict.items() if value == min_value]
    while min(result) < 45:
        projDict.pop(min(result))
        if len(projDict) != 0:
            min_value = min(projDict.values())
            result = [key for key, value in projDict.items() if value == min_value]
    if min(result) < 45:
        return (width - 1)
    else:
        return min(result)

def findRectangleBorder(img):
    height, width = img.shape[:2]
    #find the left border
    leftBorder = 0
    for w in range(width):
        for h in range(height):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                leftBorder = w
                break
    rightBorder = width - 1

    #find the right border
    for w in range(width - 1, 1, -1):
        for h in range(height):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                rightBorder = w
                break
    topBorder = 0
    #find the top border
    for h in range(height):
        for w in range(width):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                topBorder = h
                break

    #find the bottom border
    bottomBorder = height - 1
    for h in range(height - 1, 1, -1):
        for w in range(width):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                bottomBorder = h
                break

    cutImg = img[bottomBorder:topBorder, rightBorder:leftBorder]
    return cutImg

def elementaryRevise(img):
    img = findRectangleBorder(img)
    img = cv2.copyMakeBorder(img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value = (255, 255, 255))
    img = cv2.resize(img, (60 ,80))
    return img

def countRate(file):
    img = cv2.imread(file)
    height, width = img.shape[:2]
    count = 0
    for h in range(height):
        for w in range(width):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                count += 1
    if (count / img.size) < 0.05:
        os.remove(file)

def segmentation(img):
    cntChar = 0
    currentCol = 0
    rotateImg = rotateImage(img, 180)
    #cv2.imwrite('rotate.png', rotateImg)
    rotateImg = makeInnerBorder(rotateImg, 1, 1, 1, 1, [255, 255, 255])


    right = findBorder(0, [0,0,0], rotateImg.shape[0], rotateImg)
    right = img.shape[1] - right + 2
    #print(right)
    while abs(currentCol - right) > 2:
        #print(currentCol)
        image, currentCol = splitWord(img, currentCol)
        image = cv2.copyMakeBorder(image, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
        #image = elementaryRevise(image)
        if image.shape[1] < 200:
            image = elementaryRevise(image)
            cv2.imwrite('{cntChar}.png'.format(**locals()), image)
            cntChar += 1

        else:
            cntDict = dict()
            for i in range(1, 21):
                rotateChar = rotateImage(image, i)
                rotateChar = findRectangleBorder(rotateChar)
                #cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar)

                cntDict[i] = verticalProjection(rotateChar)
            bestAngle = min(cntDict, key = cntDict.get)
            rotatedImg = rotateImage(image, bestAngle)
            rotatedImg = findRectangleBorder(rotatedImg)
            min_cut = cntDict[bestAngle]
            cv2.imwrite('rotated_{bestAngle}_degree.png'.format(**locals()), rotatedImg)
            new_cut = rotatedImg
            cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1)

            #new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut]

            new_cut = rotateImage(new_cut, 360 - bestAngle)
            new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
            new_cut = elementaryRevise(new_cut)
            cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut)
            cntChar += 1
            rotatedImg = cv2.imread('rotated_{bestAngle}_degree.png'.format(**locals()))
            remainImg = rotatedImg
            cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1)
            #remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]]
            #plt.imshow(remainImg)
            #plt.show()

            #the real angle to rotate back need to estimate
            remainImg = rotateImage(remainImg, 360 - bestAngle)
            remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
            #deal the remaining character
            if remainImg.shape[1] < 200:

                remainImg = elementaryRevise(remainImg)
                cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg)
                cntChar += 1
            while remainImg.shape[1] > 200:
                #cv2.imwrite('remain.png', remainImg)
                #for i in range(1, 21):
                    #rotateChar = rotateImage(remainImg, i)
                    #rotateChar = findRectangleBorder(rotateChar)
                    #cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar)
                rotateChar = rotateImage(remainImg, bestAngle)
                rotateChar = findRectangleBorder(rotateChar)
                cv2.imwrite('rotated_{bestAngle}.png'.format(**locals()), rotateChar)
                cntDict[bestAngle] = verticalProjection(rotateChar)
                #bestAngle = min(cntDict, key = cntDict.get)
                #rotatedImg = rotateImage(image, bestAngle)
                #rotatedImg = findRectangleBorder(rotatedImg)
                min_cut = cntDict[bestAngle]
                new_cut = rotateChar
                cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1)

                #new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut]

                new_cut = rotateImage(new_cut, 360 - bestAngle)
                new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))

                new_cut = elementaryRevise(new_cut)
                cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut)
                cntChar += 1
                rotatedImg = cv2.imread('rotated_{bestAngle}.png'.format(**locals()))
                remainImg = rotatedImg
                #plt.imshow(remainImg)
                #plt.show()
                cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1)
                #remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]]
                #plt.imshow(remainImg)
                #plt.show()


                remainImg = rotateImage(remainImg, 360 - bestAngle)
                remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))

                if remainImg.shape[1] < 200:
                    remainImg = elementaryRevise(remainImg)
                    cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg)
                    cntChar += 1


            #print('rotated_{bestAngle}_degree.png'.format(**locals()), bestAngle, min_cut)


        #find the least number of text color of column
        #if cntNum < textColorNum:
        #    textColorNum = cntNum


image = cv2.imread("perfect.png")

image = cv2.copyMakeBorder(image, 30, 30, 30, 30,cv2.BORDER_CONSTANT, value = (255, 255, 255))
segmentation(image)
#dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR')


for file in os.listdir(dir_path):
    if file.endswith('.png'):
        countRate(file)
        if len(file) == 5 and os.path.isfile(file) :
            shutil.move('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\' + file,'C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set\\' + file)

dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set')


def trainData(img):
    conn = sqlite3.connect('dataset.sqlite')
    cur = conn.cursor()
    cur.execute('SELECT * FROM Data')
    min = float('inf')
    for row in cur:
        ablob = row[0]

        with open('test.png', 'wb') as output_file:
            output_file.write(ablob)
        dataImg = cv2.imread('test.png')
        err = mse(dataImg, img)
        if err < min:
            min = err
            char = row[1]

    return char

def mse(img1, img2):
    err = np.sum((img1.astype('float') - img2.astype('float'))**2)
    err /= float(img1.shape[0] * img1.shape[1])
    return err

for file in os.listdir(dir_path):
    imgFile = cv2.imread(dir_path + "\\" + file)
    plt.imshow(imgFile)
    plt.show()
    char = trainData(imgFile)
    print(char)
    judge = input()
    if judge == 'y':
        f = open(dir_path + "\\" + file, 'rb')
        ablob = f.read()
        conn = sqlite3.connect('dataset.sqlite')
        cur = conn.cursor()


        cur.execute('''
        INSERT INTO Data(img, label)
        VALUES(?, ?)''',(sqlite3.Binary(ablob), char.upper()))
        conn.commit()
        conn.close()
        f.close()

for file in os.listdir(dir_path):
    if file.endswith('.png'):
        os.remove(dir_path + "\\" + file)


def productTestData():
    for file in os.listdir(dir_path):
        imgFile = cv2.imread(dir_path + "\\" + file)
        plt.imshow(imgFile)
        plt.show()
        target = input("The target is ")
        if not target.isspace():
            f = open(dir_path + "\\" + file, 'rb')
            ablob = f.read()
            conn = sqlite3.connect('dataset.sqlite')
            cur = conn.cursor()


            cur.execute('''
            INSERT INTO Data(img, label)
            VALUES(?, ?)''',(sqlite3.Binary(ablob), target.upper()))
            conn.commit()
            conn.close()
            f.close()


#sm = browser.find_element_by_name('ibnSubmit').click()
	from selenium import webdriver
	from selenium.webdriver.common.keys import Keys
	import urllib.request
	from PIL import Image
	from selenium.webdriver.support.select import Select
	from PIL import ImageEnhance
	import matplotlib.pyplot as plt
	import cv2
	import numpy as np
	import urllib.request
	from lxml import etree
	import os
	import shutil
	import sqlite3
















	## get the image source
	#img = driver.find_element_by_xpath('/html/body/img')
	dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR')

	for file in os.listdir(dir_path):
	if file.endswith('.png'):
	os.remove(dir_path + "\\" + file)

	#dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set')

	#for file in os.listdir(dir_path):
	# if file.endswith('.png'):
	# os.remove(dir_path + "\\" + file)

	src = 'https://isdna1.yzu.edu.tw/CnStdSel/SelRandomImage.aspx'
	#download the image
	urllib.request.urlretrieve(src, "captcha.png")
	#driver.close()
	Im = Image.open('captcha.png')
	width, height = Im.size
	quartersizedIm = Im.resize((int(width * 6), int(height * 6)))
	quartersizedIm.save('foursized.png')
	im = Image.open('foursized.png')

	xsize, ysize = im.size








	enhancer = ImageEnhance.Contrast(im)
	im = enhancer.enhance(3.0)
	enhancer = ImageEnhance.Brightness(im)
	im = enhancer.enhance(10.0)
	xsize, ysize = im.size
	im.save('enhance.png')


	pix = im.load()
	for y in range(0, ysize):
	for x in range(0, xsize):
	if pix[x , y][1] != 255 and pix[x , y][2] == 255 and pix[x , y][3] == 255:
	pix[x , y] = (0 , 0 , 0 , 255)
	else:
	pix[x , y] = (255 , 255 , 255 , 255)

	im.save('new.png')

	kernel = np.ones((1, 1), np.uint16)
	image = cv2.imread('new.png')
	erosion = cv2.erode(image, kernel, iterations = 1)
	blurred = cv2.GaussianBlur(erosion, (5, 5), 0)
	edged = cv2.Canny(blurred, 30, 150)
	plt.imshow(edged)
	#plt.show()
	dilation = cv2.dilate(edged, kernel, iterations = 1)
	plt.imshow(dilation)
	#plt.show()
	cv2.imwrite('process.png', dilation)
	img1 = cv2.imread('new.png')
	img2 = cv2.imread('process.png')
	img1 = cv2.bitwise_not(img1)
	final = cv2.add(img1, img2)

	#erosion = cv2.erode(final, kernel, iterations = 1)
	#blurred = cv2.GaussianBlur(erosion, (5, 5), 0)
	final = cv2.bitwise_not(final)
	plt.imshow(final)
	#plt.show()
	#cv2.imwrite('final.png', final)
	gray = cv2.cvtColor(final,cv2.COLOR_BGR2GRAY)
	kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
	res = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
	plt.imshow(res)
	#plt.show()
	cv2.imwrite('perfect.png', res)

	def mse(imgA, imgB):
	err = np.sum(imgA.astype("float") - imgB.astype("float") ** 2)
	err /= float(imgA.shape[0] * imgA.shape[1])
	return err
	def findBorder(axis, color, leng, img):

	findNonTextColor = False
	black = [0, 0, 0]
	white = [255, 255, 255]


	textBorder = axis + 1


	textColorNum = leng
	cntWhite = 0

	while (not findNonTextColor):
	#cntNum = 0
	#grayImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	for i in range(leng):#decideRange[0], decideRange[1]):
	if leng == img.shape[0]:
	coordinate = i, axis
	else:
	coordinate = axis, i

	# if column of image is black

	if (img.item(coordinate[0], coordinate[1], 0) == color[0]) and (img.item(coordinate[0], coordinate[1], 1) == color[1]) and (img.item(coordinate[0], coordinate[1], 2) == color[2]):

	#cntNum += 1

	if color == black:
	findNonTextColor = True
	textBorder = axis
	break
	else:
	cntWhite += 1
	if cntWhite == leng:
	textBorder = axis
	findNonTextColor = True
	break
	axis += 1

	cntWhite = 0

	return textBorder
	def splitWord(img, currentCol):
	height, width = img.shape[:2]
	black = [0, 0, 0]
	white = [255, 255, 255]

	line = currentCol + 1
	leftSide = findBorder(line, black, height, img)

	line = leftSide
	rightSide = findBorder(line, white,height, img)
	line = 0
	topSide = findBorder(line, black, width, img)

	line = topSide
	bottomSide = findBorder(line, white, width, img)

	charImg = img[topSide:bottomSide, leftSide:rightSide]
	return charImg, rightSide
	def rotateImage(image, angle):
	(h, w) = image.shape[:2]
	center = (w / 2, h / 2)


	M = cv2.getRotationMatrix2D(center, angle, 1.0)
	rotated = cv2.warpAffine(image, M, (w, h), borderMode = cv2.BORDER_CONSTANT, borderValue = (255, 255, 255))


	return rotated
	def makeInnerBorder(img, left, right, top, bottom, color):
	height, width = img.shape[:2]
	for i in range(height):
	for l in range(left):
	img[i, l] = color
	for r in range(right):
	img[i, r] = color
	for j in range(width):
	for t in range(top):
	img[t, j] = color
	for b in range(bottom):
	img[b, j] = color
	return img
	def verticalProjection(img):
	height, width = img.shape[:2]
	projDict = dict()
	for i in range(width):
	projDict[i] = 0
	for h in range(height):

	for w in range(width):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	projDict[w] += 1


	min_value = min(projDict.values())
	result = [key for key, value in projDict.items() if value == min_value]
	while min(result) < 45:
	projDict.pop(min(result))
	if len(projDict) != 0:
	min_value = min(projDict.values())
	result = [key for key, value in projDict.items() if value == min_value]
	if min(result) < 45:
	return (width - 1)
	else:
	return min(result)

	def findRectangleBorder(img):
	height, width = img.shape[:2]
	#find the left border
	leftBorder = 0
	for w in range(width):
	for h in range(height):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	leftBorder = w
	break
	rightBorder = width - 1

	#find the right border
	for w in range(width - 1, 1, -1):
	for h in range(height):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	rightBorder = w
	break
	topBorder = 0
	#find the top border
	for h in range(height):
	for w in range(width):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	topBorder = h
	break

	#find the bottom border
	bottomBorder = height - 1
	for h in range(height - 1, 1, -1):
	for w in range(width):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	bottomBorder = h
	break

	cutImg = img[bottomBorder:topBorder, rightBorder:leftBorder]
	return cutImg

	def elementaryRevise(img):
	img = findRectangleBorder(img)
	img = cv2.copyMakeBorder(img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value = (255, 255, 255))
	img = cv2.resize(img, (60 ,80))
	return img

	def countRate(file):
	img = cv2.imread(file)
	height, width = img.shape[:2]
	count = 0
	for h in range(height):
	for w in range(width):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	count += 1
	if (count / img.size) < 0.05:
	os.remove(file)

	def segmentation(img):
	cntChar = 0
	currentCol = 0
	rotateImg = rotateImage(img, 180)
	#cv2.imwrite('rotate.png', rotateImg)
	rotateImg = makeInnerBorder(rotateImg, 1, 1, 1, 1, [255, 255, 255])


	right = findBorder(0, [0,0,0], rotateImg.shape[0], rotateImg)
	right = img.shape[1] - right + 2
	#print(right)
	while abs(currentCol - right) > 2:
	#print(currentCol)
	image, currentCol = splitWord(img, currentCol)
	image = cv2.copyMakeBorder(image, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
	#image = elementaryRevise(image)
	if image.shape[1] < 200:
	image = elementaryRevise(image)
	cv2.imwrite('{cntChar}.png'.format(**locals()), image)
	cntChar += 1

	else:
	cntDict = dict()
	for i in range(1, 21):
	rotateChar = rotateImage(image, i)
	rotateChar = findRectangleBorder(rotateChar)
	#cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar)

	cntDict[i] = verticalProjection(rotateChar)
	bestAngle = min(cntDict, key = cntDict.get)
	rotatedImg = rotateImage(image, bestAngle)
	rotatedImg = findRectangleBorder(rotatedImg)
	min_cut = cntDict[bestAngle]
	cv2.imwrite('rotated_{bestAngle}_degree.png'.format(**locals()), rotatedImg)
	new_cut = rotatedImg
	cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1)

	#new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut]

	new_cut = rotateImage(new_cut, 360 - bestAngle)
	new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
	new_cut = elementaryRevise(new_cut)
	cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut)
	cntChar += 1
	rotatedImg = cv2.imread('rotated_{bestAngle}_degree.png'.format(**locals()))
	remainImg = rotatedImg
	cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1)
	#remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]]
	#plt.imshow(remainImg)
	#plt.show()

	#the real angle to rotate back need to estimate
	remainImg = rotateImage(remainImg, 360 - bestAngle)
	remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
	#deal the remaining character
	if remainImg.shape[1] < 200:

	remainImg = elementaryRevise(remainImg)
	cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg)
	cntChar += 1
	while remainImg.shape[1] > 200:
	#cv2.imwrite('remain.png', remainImg)
	#for i in range(1, 21):
	#rotateChar = rotateImage(remainImg, i)
	#rotateChar = findRectangleBorder(rotateChar)
	#cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar)
	rotateChar = rotateImage(remainImg, bestAngle)
	rotateChar = findRectangleBorder(rotateChar)
	cv2.imwrite('rotated_{bestAngle}.png'.format(**locals()), rotateChar)
	cntDict[bestAngle] = verticalProjection(rotateChar)
	#bestAngle = min(cntDict, key = cntDict.get)
	#rotatedImg = rotateImage(image, bestAngle)
	#rotatedImg = findRectangleBorder(rotatedImg)
	min_cut = cntDict[bestAngle]
	new_cut = rotateChar
	cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1)

	#new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut]

	new_cut = rotateImage(new_cut, 360 - bestAngle)
	new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))

	new_cut = elementaryRevise(new_cut)
	cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut)
	cntChar += 1
	rotatedImg = cv2.imread('rotated_{bestAngle}.png'.format(**locals()))
	remainImg = rotatedImg
	#plt.imshow(remainImg)
	#plt.show()
	cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1)
	#remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]]
	#plt.imshow(remainImg)
	#plt.show()


	remainImg = rotateImage(remainImg, 360 - bestAngle)
	remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))

	if remainImg.shape[1] < 200:
	remainImg = elementaryRevise(remainImg)
	cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg)
	cntChar += 1


	#print('rotated_{bestAngle}_degree.png'.format(**locals()), bestAngle, min_cut)






	#find the least number of text color of column
	#if cntNum < textColorNum:
	# textColorNum = cntNum




	image = cv2.imread("perfect.png")

	image = cv2.copyMakeBorder(image, 30, 30, 30, 30,cv2.BORDER_CONSTANT, value = (255, 255, 255))
	segmentation(image)
	#dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR')



	for file in os.listdir(dir_path):
	if file.endswith('.png'):
	countRate(file)
	if len(file) == 5 and os.path.isfile(file) :
	shutil.move('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\' + file,'C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set\\' + file)

	dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set')



	def trainData(img):
	conn = sqlite3.connect('dataset.sqlite')
	cur = conn.cursor()
	cur.execute('SELECT * FROM Data')
	min = float('inf')
	for row in cur:
	ablob = row[0]

	with open('test.png', 'wb') as output_file:
	output_file.write(ablob)
	dataImg = cv2.imread('test.png')
	err = mse(dataImg, img)
	if err < min:
	min = err
	char = row[1]

	return char

	def mse(img1, img2):
	err = np.sum((img1.astype('float') - img2.astype('float'))**2)
	err /= float(img1.shape[0] * img1.shape[1])
	return err

	for file in os.listdir(dir_path):
	imgFile = cv2.imread(dir_path + "\\" + file)
	plt.imshow(imgFile)
	plt.show()
	char = trainData(imgFile)
	print(char)
	judge = input()
	if judge == 'y':
	f = open(dir_path + "\\" + file, 'rb')
	ablob = f.read()
	conn = sqlite3.connect('dataset.sqlite')
	cur = conn.cursor()


	cur.execute('''
	INSERT INTO Data(img, label)
	VALUES(?, ?)''',(sqlite3.Binary(ablob), char.upper()))
	conn.commit()
	conn.close()
	f.close()

	for file in os.listdir(dir_path):
	if file.endswith('.png'):
	os.remove(dir_path + "\\" + file)



	def productTestData():
	for file in os.listdir(dir_path):
	imgFile = cv2.imread(dir_path + "\\" + file)
	plt.imshow(imgFile)
	plt.show()
	target = input("The target is ")
	if not target.isspace():
	f = open(dir_path + "\\" + file, 'rb')
	ablob = f.read()
	conn = sqlite3.connect('dataset.sqlite')
	cur = conn.cursor()


	cur.execute('''
	INSERT INTO Data(img, label)
	VALUES(?, ?)''',(sqlite3.Binary(ablob), target.upper()))
	conn.commit()
	conn.close()
	f.close()





























	#sm = browser.find_element_by_name('ibnSubmit').click()