Skip to content

Instantly share code, notes, and snippets.

@abdul-rehman-2050
Forked from yosemitebandit/classify.py
Created April 19, 2016 03:28
Show Gist options
  • Save abdul-rehman-2050/4b4b6ebe0915c3e7de8dc7c4c21307c5 to your computer and use it in GitHub Desktop.
Save abdul-rehman-2050/4b4b6ebe0915c3e7de8dc7c4c21307c5 to your computer and use it in GitHub Desktop.
OCR with OpenCV - KNN methods from Abid Rahman
''' classify
after training, classify a sample image with K-Nearest Neighbor
annotated from Abid Rahman's post: http://stackoverflow.com/a/9620295/232638
'''
import cv2
import numpy as np
# load the data we generated previously
samples = np.loadtxt('general-samples.data', np.float32)
responses = np.loadtxt('general-responses.data', np.float32)
responses = responses.reshape((responses.size,1))
# train the KNN model
model = cv2.KNearest()
model.train(samples, responses)
# test the model with another image
image = cv2.imread('pi.png')
out = np.zeros(image.shape, np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# create black and white image
thresh = cv2.adaptiveThreshold(gray, 255, 1, 1, 11, 2)
# find contours
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST
, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
if cv2.contourArea(contour) > 50:
# contour is sufficiently large to possibly be a number
[x, y, w, h] = cv2.boundingRect(contour)
if h > 28:
# sample height is sufficiently large to possibly be a number
# draw the bounding box
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
# select the sample, resize to 10x10 and then vectorize
roi = thresh[y:y+h, x:x+w]
roi_small = cv2.resize(roi,(10,10))
roi_small = roi_small.reshape((1,100))
roi_small = np.float32(roi_small)
# find nearest neighbor
retval, results, neigh_resp, dists = model.find_nearest(roi_small
, k = 1)
# extra parens?
string = str(int((results[0][0])))
# write the result the output image
cv2.putText(out, string, (x, y+h), 0, 1, (0, 255, 0))
# show the results
cv2.imshow('im',image)
cv2imshow('out',out)
cv2.waitKey(0)
''' train
open a training image of numbers, train.png
preprocess and find contours
for each contour, prompt user for input as to which number is being displayed
annotated from Abid Rahman's post: http://stackoverflow.com/a/9620295/232638
'''
import numpy as np
import cv2
# open training image for processing
image = cv2.imread('train.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
# create black and white image
thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 11, 2)
# find contorus
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST
, cv2.CHAIN_APPROX_SIMPLE)
samples = np.empty((0, 100))
responses = []
# keyboard mappings for 0-9; user may type in this range when prompted
keys = [i for i in range(48, 58)]
for contour in contours:
if cv2.contourArea(contour) > 50:
# sufficiently large contour to possibly be a number
[x, y, w, h] = cv2.boundingRect(contour)
if h > 28:
# tall enough to possibly be a number
# draw the bounding box on the image
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 0, 255), 2)
roi = thresh[y:y+h, x:x+w]
roi_small = cv2.resize(roi, (10, 10))
# show image and wait for keypress
cv2.imshow('norm', image)
key = cv2.waitKey(0)
if key == 27:
sys.exit()
elif key in keys:
# save pixel data in 1x100 matrix of 'samples'
sample = roi_small.reshape((1,100))
samples = np.append(samples,sample,0)
# save input in 'responses'
responses.append(int(chr(key)))
print "training complete"
np.savetxt('general-samples.data', samples)
responses = np.array(responses, np.float32)
responses = responses.reshape((responses.size,1))
np.savetxt('general-responses.data', responses)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment