Created
October 4, 2014 21:11
-
-
Save trhura/c2219325042382a20dee to your computer and use it in GitHub Desktop.
Python script which scan ooredoo topup cards and extract the topup code using opencv+tesseract
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python2 | |
import cv2 | |
import numpy as np | |
from collections import deque | |
orig_image = None | |
gray_image = None | |
threshold_image = None | |
edge_image = None | |
def gray_step(): | |
""" Convert image to gray image """ | |
global orig_image, gray_image | |
gray_image = cv2.cvtColor(orig_image,cv2.COLOR_BGR2GRAY) | |
return gray_image | |
def threshold_step(): | |
""" extract white regions """ | |
global threshold_image | |
_, threshold_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY) | |
return threshold_image | |
def edge_step (): | |
""" Add edges to white regions """ | |
global edge_image | |
#edge_image = cv2.Canny(gray_image, 50, 150,apertureSize = 3) | |
edge_image = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2) | |
edge_image = cv2.bitwise_and(edge_image, edge_image, mask=threshold_image) | |
kernel = np.ones((5,5),np.uint8) | |
edge_image = cv2.erode(edge_image, kernel) | |
return edge_image | |
def mask_image(): | |
""" Find the biggest white blob in the image """ | |
global mask_image | |
contours, _ = cv2.findContours(edge_image,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE) | |
height, width = edge_image.shape[:2] | |
mask_image = np.zeros(edge_image.shape,np.uint8) | |
area = height * width | |
for cnt in contours: | |
if (area/5) <cv2.contourArea(cnt): | |
x,y,w,h = cv2.boundingRect(cnt) | |
cv2.rectangle(mask_image,(x,y),(x+w,y+h),255, -1) | |
return mask_image | |
def find_topup_region (): | |
""" Find the gray topup region """ | |
topup_region = cv2.inRange(gray_image, 60, 105) | |
topup_region = cv2.bitwise_and(topup_region, topup_region, mask=mask_image) | |
contours, _ = cv2.findContours(topup_region, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
maxcnt = None | |
maxarea = 0 | |
for cnt in contours: | |
if cv2.contourArea(cnt) > maxarea: | |
maxcnt = cnt | |
maxarea = cv2.contourArea(cnt) | |
x, y, w, h = cv2.boundingRect(maxcnt) | |
result = gray_image[y:y+h, x:x+w] | |
kernel = np.ones((5,5),np.uint8) | |
result = cv2.erode(result, kernel) | |
return result | |
def main(): | |
global orig_image | |
orig_image = cv2.imread('1.jpg') | |
cv2.namedWindow('Image', cv2.WINDOW_NORMAL) | |
cv2.imshow('Image',orig_image) | |
steps = deque([gray_step, threshold_step, edge_step, mask_image, find_topup_region]) | |
current_image = orig_image | |
while True: | |
key = cv2.waitKey(1) | |
if key & 0xFF == ord('q'): | |
break | |
if key & 0xFF == ord('n'): | |
try: | |
next_step = steps.popleft() | |
current_image = next_step() | |
cv2.imshow('Image',current_image) | |
except IndexError: | |
height, width = current_image.shape[:2] | |
ratio = (float(300)/width) | |
print width, height, ratio, ratio*height | |
result = cv2.resize(current_image, (300, int(ratio*height))) | |
cv2.imshow('Image', result) | |
cv2.imwrite('roi.png', result) | |
import pytesseract, Image, string | |
img = Image.open('roi.png') | |
ocr_result = pytesseract.image_to_string(img) | |
ocr_result = "".join(c for c in ocr_result if c in string.digits) # only numbers | |
ocr_result = " ".join(ocr_result[i*4:i*4+4] for i, _ in enumerate(ocr_result[::4])) | |
cv2.putText(orig_image, ocr_result, (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 0, 255), 5) | |
cv2.imshow('Image', orig_image) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment