Skip to content

Instantly share code, notes, and snippets.

Last active December 28, 2019 08:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AdroitAnandAI/914ca53e55ff15837ece08597d034a05 to your computer and use it in GitHub Desktop.
Save AdroitAnandAI/914ca53e55ff15837ece08597d034a05 to your computer and use it in GitHub Desktop.
Text Inversion with Shape Context
import numpy as np
import cv2
import sys
from scipy.spatial.distance import cdist, cosine
from shape_context import ShapeContext
import matplotlib.pyplot as plt
import imutils
numberImage = 'images/numbers.png'
uprightImage = 'images/numbers_test4.png'
invertedImage = 'images/numbers_test4_inverted.png'
sc = ShapeContext()
def get_contour_bounding_rectangles(gray):
Getting all 2nd level bouding boxes based on contour detection algorithm.
cnts = cv2.findContours(gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
res = []
for cnt in cnts[1]:
(x, y, w, h) = cv2.boundingRect(cnt)
res.append((x, y, x + w, y + h))
return res
def parse_nums(sc, path):
img = cv2.imread(path, 0)
# invert image colors
img = cv2.bitwise_not(img)
_, img = cv2.threshold(img, 254, 255, cv2.THRESH_BINARY)
# making numbers fat for better contour detectiion
kernel = np.ones((2, 2), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
print('After thresholding and dilation...')
# getting our numbers one by one
rois = get_contour_bounding_rectangles(img)
grayd = cv2.cvtColor(img.copy(), cv2.COLOR_GRAY2BGR)
nums = []
for r in rois:
grayd = cv2.rectangle(grayd, (r[0], r[1]), (r[2], r[3]), (0, 255, 0), 1)
nums.append((r[0], r[1], r[2], r[3]))
print('After greying and bounding...')
# we are getting contours in different order so we need to sort them by x1
nums = sorted(nums, key=lambda x: x[0])
print('bounding box x coords')
descs = []
for i, r in enumerate(nums):
points = sc.get_points_from_img(img[r[1]:r[3], r[0]:r[2]], 15)
descriptor = sc.compute(points).flatten()
return np.array(descs)
def match(base, current):
Here we are using cosine diff instead of "by paper" diff, cause it's faster
res = cdist(base, current.reshape((1, current.shape[0])), metric="cosine")
char = str(np.argmin(res.reshape(11)))
if char == '10':
char = "/"
return char, np.min(res.reshape(11))
def findUpright(baseImage, firstImg, secondImg):
base_0123456789 = parse_nums(sc, baseImage)
recognize = parse_nums(sc, firstImg)
recognize_inverted = parse_nums(sc, secondImg)
txt = ""
matchFactor = 0
val = 0
for r in recognize:
c, val = match(base_0123456789, r)
txt += c
matchFactor += val
txtInverted = ""
matchFactorInv = 0
val = 0
for r in recognize_inverted:
c, val = match(base_0123456789, r)
txtInverted += c
matchFactorInv += val
print("\nUpright Text Match Value = " + str(matchFactor))
print("Flip Text Match Value = " + str(matchFactorInv))
if (matchFactor > matchFactorInv):
return secondImg, firstImg
return firstImg, secondImg
def main():
upImg, invImg = findUpright(numberImage, uprightImage, invertedImage)
print("\n\nThis is the upright Image: " + upImg)
print("This is the inverted Image: " + invImg)
img = cv2.imread(upImg)
if __name__== "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment