Created October 17, 2019 20:03
This code extract the individual characters from the input image and output an image with combined character regions
import cv2
import matplotlib.pyplot as plt
from skimage import measure
# Ref to
import numpy as np
def iteration(image: np.ndarray, value: int) -> np.ndarray:
This method iterates over the provided image by converting 255's to 0's if the number of consecutive 255's are
less the "value" provided
rows, cols = image.shape
for row in range(0,rows):
start = image[row].tolist().index(0) # to start the conversion from the 0 pixel
except ValueError:
start = 0 # if '0' is not present in that row
count = start
for col in range(start, cols):
if image[row, col] == 0:
if (col-count) <= value and (col-count) > 0:
image[row, count:col] = 0
count = col
return image
def rlsa(image: np.ndarray, value: int = 0, horizontal: bool = True) -> np.ndarray:
rlsa(RUN LENGTH SMOOTHING ALGORITHM) is to extract the block-of-text or the Region-of-interest(ROI) from the
document binary Image provided. Must pass binary image of ndarray type.
image = image.copy()
if isinstance(image, np.ndarray): # image must be binary of ndarray type
value = int(value) if value>=0 else 0 # consecutive pixel position checker value to convert 255 to 0
# RUN LENGTH SMOOTHING ALGORITHM working horizontally on the image
if horizontal:
image = iteration(image, value)
# RUN LENGTH SMOOTHING ALGORITHM working vertically on the image
image = image.T
image = iteration(image, value)
image = image.T
except (AttributeError, ValueError) as e:
image = None
print("ERROR: ", e, "\n")
print('Image must be an np ndarray and must be in "binary". Use Opencv/PIL to convert the image to binary.\n')
print("import cv2;\nimage=cv2.imread('path_of_the_image');\ngray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY);\n\
(thresh, image_binary) = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)\n")
print("method usage -- rlsa.rlsa(image_binary, True, False, 10)")
print('Image must be an np ndarray and must be in binary')
image = None
return image
def obtain_contour(img):
img = img.astype('uint8')*255
contours, hierarchy = cv2.findContours(img, 1, 2)
contours = sorted(contours, key = cv2.contourArea)
x,y,w,h = cv2.boundingRect(contours[-1])
return [x,y,w,h]
def recombine_img(img_binary, img_color):
mask = rlsa(img_binary, 40, True)
component_diagram = measure.label(255-mask)
component_areas = np.bincount(component_diagram.ravel())
# The first element is the background class
img_width = img_color.shape[1]
coords = []
for component_id, area in enumerate(component_areas[1:], start=1):
if area > 10:
content_region = component_diagram==(component_id)
x,y,w,h = obtain_contour(content_region)
coords.append((y-1, y+h+1))
return np.vstack([img_color[coord[0]:coord[1]] for coord in coords])
if __name__ == "__main__":
img_color = cv2.imread('0_1.png')
img_gray = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
_, img_binary = cv2.threshold(img_gray, 0, 255, cv2.THRESH_OTSU)
img_recombined = recombine_img(img_binary, img_color)
plt.imshow(img_color[..., ::-1])
plt.imshow(img_recombined[..., ::-1])
