Skip to content

Instantly share code, notes, and snippets.

@AmalJossy
Last active October 14, 2018 18:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AmalJossy/9f102014b40c832b59f7e40027badba1 to your computer and use it in GitHub Desktop.
Save AmalJossy/9f102014b40c832b59f7e40027badba1 to your computer and use it in GitHub Desktop.
character segmentation of given text image
import cv2
import numpy as np
#import image
image = cv2.imread('1.png')
# image = cv2.imread('malayalam.jpg')
# image = cv2.imread('medium.png')
#cv2.imshow('orig',image)
#cv2.waitKey(0)
#grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
cv2.imshow('gray',gray)
cv2.waitKey(0)
#binary
ret,thresh = cv2.threshold(gray,192,255,cv2.THRESH_BINARY_INV)
# print("thresh ",thresh.dtype)
cv2.imshow('binarised',thresh)
cv2.waitKey(0)
#dilation
#kernel = np.ones((5,5), np.uint8) # word dilation
kernel = np.ones((5,50), np.uint8) # line dilation
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
# print("dilated ",img_dilation.dtype)
cv2.imshow('dilated',img_dilation)
cv2.waitKey(0)
#find contours
im2,line_ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#sort contours
sorted_ctrs = sorted(line_ctrs, key=lambda ctr: cv2.boundingRect(ctr)[1])
for i, line_ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(line_ctr)
# Getting ROI
roi = image[y:y+h, x:x+w]
roi_thresh= thresh[y:y+h, x:x+w]
mask=np.zeros_like(thresh)
cv2.drawContours(mask, [line_ctr], 0, (255,255,255), -1)
roi_mask=mask[y:y+h, x:x+w]
roi_thresh=cv2.bitwise_and(roi_thresh,roi_mask)
# roi = cv2.resize(roi,None,fx=4, fy=4, interpolation = cv2.INTER_CUBIC)
# show ROI
# cv2.imshow('segment no:'+str(i),roi_thresh)
# cv2.imshow('orig',image)
word_kernel = np.ones((5,10), np.uint8)
img_dilation = cv2.dilate(roi_thresh, word_kernel, iterations=1)
im2,word_ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
word_ctrs = sorted(word_ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
# cv2.rectangle(image,(x,y),( x + w, y + h ),(90,0,255),2)
for j,word_ctr in enumerate(word_ctrs):
xw,yw,ww,hw=cv2.boundingRect(word_ctr)
word_roi = roi[yw:yw+hw, xw:xw+ww]
word_thresh = roi_thresh[yw:yw+hw, xw:xw+ww]
# print(word_roi.shape)
# print(labels.shape)
# cv2.imshow('segment no:'+str(i)+str(j),word_roi)
# cv2.imshow('orig',image)
# cv2.rectangle(roi,(xw,yw),( xw + ww, yw + hw ),(90,0,255),2)
# cv2.waitKey(0)
im2,char_ctrs, hier = cv2.findContours(word_thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
char_ctrs = sorted(char_ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for k,char_ctr in enumerate(char_ctrs):
xc,yc,wc,hc=cv2.boundingRect(char_ctr)
cv2.rectangle(word_roi,(xc,yc),( xc + wc, yc + hc ),(90,0,255),2)
# cv2.imshow('orig',image)
# cv2.waitKey(0)
# cv2.imshow('segment no:'+str(i)+str(j),word_roi)
# cv2.waitKey(0)
# print(cv2.boundingRect(ctr))
# cv2.waitKey(0)
cv2.imshow('marked areas',image)
cv2.waitKey(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment