-
-
Save abdul-rehman-2050/4b4b6ebe0915c3e7de8dc7c4c21307c5 to your computer and use it in GitHub Desktop.
OCR with OpenCV - KNN methods from Abid Rahman
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' classify | |
after training, classify a sample image with K-Nearest Neighbor | |
annotated from Abid Rahman's post: http://stackoverflow.com/a/9620295/232638 | |
''' | |
import cv2 | |
import numpy as np | |
# load the data we generated previously | |
samples = np.loadtxt('general-samples.data', np.float32) | |
responses = np.loadtxt('general-responses.data', np.float32) | |
responses = responses.reshape((responses.size,1)) | |
# train the KNN model | |
model = cv2.KNearest() | |
model.train(samples, responses) | |
# test the model with another image | |
image = cv2.imread('pi.png') | |
out = np.zeros(image.shape, np.uint8) | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
# create black and white image | |
thresh = cv2.adaptiveThreshold(gray, 255, 1, 1, 11, 2) | |
# find contours | |
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST | |
, cv2.CHAIN_APPROX_SIMPLE) | |
for contour in contours: | |
if cv2.contourArea(contour) > 50: | |
# contour is sufficiently large to possibly be a number | |
[x, y, w, h] = cv2.boundingRect(contour) | |
if h > 28: | |
# sample height is sufficiently large to possibly be a number | |
# draw the bounding box | |
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2) | |
# select the sample, resize to 10x10 and then vectorize | |
roi = thresh[y:y+h, x:x+w] | |
roi_small = cv2.resize(roi,(10,10)) | |
roi_small = roi_small.reshape((1,100)) | |
roi_small = np.float32(roi_small) | |
# find nearest neighbor | |
retval, results, neigh_resp, dists = model.find_nearest(roi_small | |
, k = 1) | |
# extra parens? | |
string = str(int((results[0][0]))) | |
# write the result the output image | |
cv2.putText(out, string, (x, y+h), 0, 1, (0, 255, 0)) | |
# show the results | |
cv2.imshow('im',image) | |
cv2imshow('out',out) | |
cv2.waitKey(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' train | |
open a training image of numbers, train.png | |
preprocess and find contours | |
for each contour, prompt user for input as to which number is being displayed | |
annotated from Abid Rahman's post: http://stackoverflow.com/a/9620295/232638 | |
''' | |
import numpy as np | |
import cv2 | |
# open training image for processing | |
image = cv2.imread('train.png') | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
blur = cv2.GaussianBlur(gray, (5, 5), 0) | |
# create black and white image | |
thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 11, 2) | |
# find contorus | |
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST | |
, cv2.CHAIN_APPROX_SIMPLE) | |
samples = np.empty((0, 100)) | |
responses = [] | |
# keyboard mappings for 0-9; user may type in this range when prompted | |
keys = [i for i in range(48, 58)] | |
for contour in contours: | |
if cv2.contourArea(contour) > 50: | |
# sufficiently large contour to possibly be a number | |
[x, y, w, h] = cv2.boundingRect(contour) | |
if h > 28: | |
# tall enough to possibly be a number | |
# draw the bounding box on the image | |
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 0, 255), 2) | |
roi = thresh[y:y+h, x:x+w] | |
roi_small = cv2.resize(roi, (10, 10)) | |
# show image and wait for keypress | |
cv2.imshow('norm', image) | |
key = cv2.waitKey(0) | |
if key == 27: | |
sys.exit() | |
elif key in keys: | |
# save pixel data in 1x100 matrix of 'samples' | |
sample = roi_small.reshape((1,100)) | |
samples = np.append(samples,sample,0) | |
# save input in 'responses' | |
responses.append(int(chr(key))) | |
print "training complete" | |
np.savetxt('general-samples.data', samples) | |
responses = np.array(responses, np.float32) | |
responses = responses.reshape((responses.size,1)) | |
np.savetxt('general-responses.data', responses) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment