Created
October 17, 2019 20:03
-
-
Save lolipopshock/2d33c0f194a3a967bce33bc4aa7057b7 to your computer and use it in GitHub Desktop.
This code extract the individual characters from the input image and output an image with combined character regions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import matplotlib.pyplot as plt | |
from skimage import measure | |
#### | |
# Ref to https://github.com/Vasistareddy/python-rlsa/blob/master/rlsa.py | |
import numpy as np | |
def iteration(image: np.ndarray, value: int) -> np.ndarray: | |
""" | |
This method iterates over the provided image by converting 255's to 0's if the number of consecutive 255's are | |
less the "value" provided | |
""" | |
rows, cols = image.shape | |
for row in range(0,rows): | |
try: | |
start = image[row].tolist().index(0) # to start the conversion from the 0 pixel | |
except ValueError: | |
start = 0 # if '0' is not present in that row | |
count = start | |
for col in range(start, cols): | |
if image[row, col] == 0: | |
if (col-count) <= value and (col-count) > 0: | |
image[row, count:col] = 0 | |
count = col | |
return image | |
def rlsa(image: np.ndarray, value: int = 0, horizontal: bool = True) -> np.ndarray: | |
""" | |
rlsa(RUN LENGTH SMOOTHING ALGORITHM) is to extract the block-of-text or the Region-of-interest(ROI) from the | |
document binary Image provided. Must pass binary image of ndarray type. | |
""" | |
image = image.copy() | |
if isinstance(image, np.ndarray): # image must be binary of ndarray type | |
value = int(value) if value>=0 else 0 # consecutive pixel position checker value to convert 255 to 0 | |
try: | |
# RUN LENGTH SMOOTHING ALGORITHM working horizontally on the image | |
if horizontal: | |
image = iteration(image, value) | |
# RUN LENGTH SMOOTHING ALGORITHM working vertically on the image | |
else: | |
image = image.T | |
image = iteration(image, value) | |
image = image.T | |
except (AttributeError, ValueError) as e: | |
image = None | |
print("ERROR: ", e, "\n") | |
print('Image must be an np ndarray and must be in "binary". Use Opencv/PIL to convert the image to binary.\n') | |
print("import cv2;\nimage=cv2.imread('path_of_the_image');\ngray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY);\n\ | |
(thresh, image_binary) = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)\n") | |
print("method usage -- rlsa.rlsa(image_binary, True, False, 10)") | |
else: | |
print('Image must be an np ndarray and must be in binary') | |
image = None | |
return image | |
#### | |
def obtain_contour(img): | |
img = img.astype('uint8')*255 | |
contours, hierarchy = cv2.findContours(img, 1, 2) | |
contours = sorted(contours, key = cv2.contourArea) | |
x,y,w,h = cv2.boundingRect(contours[-1]) | |
return [x,y,w,h] | |
def recombine_img(img_binary, img_color): | |
mask = rlsa(img_binary, 40, True) | |
component_diagram = measure.label(255-mask) | |
component_areas = np.bincount(component_diagram.ravel()) | |
# The first element is the background class | |
img_width = img_color.shape[1] | |
coords = [] | |
for component_id, area in enumerate(component_areas[1:], start=1): | |
if area > 10: | |
content_region = component_diagram==(component_id) | |
x,y,w,h = obtain_contour(content_region) | |
coords.append((y-1, y+h+1)) | |
return np.vstack([img_color[coord[0]:coord[1]] for coord in coords]) | |
if __name__ == "__main__": | |
img_color = cv2.imread('0_1.png') | |
img_gray = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY) | |
_, img_binary = cv2.threshold(img_gray, 0, 255, cv2.THRESH_OTSU) | |
img_recombined = recombine_img(img_binary, img_color) | |
plt.subplot(121) | |
plt.imshow(img_color[..., ::-1]) | |
plt.subplot(122) | |
plt.imshow(img_recombined[..., ::-1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment