Created
May 11, 2024 15:28
-
-
Save aso2101/d66252772a34a4f61b617e0d8f3b132a to your computer and use it in GitHub Desktop.
Separate text from commentary automatically in an image using OpenCV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import glob | |
import os | |
import math | |
import numpy as np | |
# Parameters: | |
# xgive is the number of pixels of "give" when determining whether a line of text | |
# belongs to the main text or commentary, when they are discriminated based on | |
# width. | |
xgive = 70 | |
images = [(cv2.imread(file),os.path.basename(file)) for file in glob.glob("*.tif")] | |
for image in images: | |
img = image[0] | |
newimg = img.copy() | |
height, width, channels = img.shape | |
filename = image[1] | |
out = "bounding_boxes/" + os.path.splitext(filename)[0]+"-new.tif" | |
roiname = "cropped/" + os.path.splitext(filename)[0]+"-roi.tif" | |
# Dilate and erode the inverted image to get horizontal lines | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
blur = cv2.GaussianBlur(gray,(15,15),0) | |
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] | |
kernel = np.ones((1,20), np.uint8) # note this is a horizontal kernel | |
d_im = cv2.dilate(thresh, kernel, iterations=6) | |
e_im = cv2.erode(d_im, kernel, iterations=6) | |
new_d_im = cv2.dilate(e_im, kernel, iterations=2) | |
# Now find the contours (bounding boxes) | |
contours, hierarchy = cv2.findContours(new_d_im, cv2.RETR_EXTERNAL, | |
cv2.CHAIN_APPROX_NONE) | |
# First, get the maximum width of the bounding boxes. | |
maxwidth = 0 | |
for cnt in contours: | |
x, y, w, h = cv2.boundingRect(cnt) | |
if w > maxwidth: | |
maxwidth = w | |
# Now, to find the first line in the commentary, we see whether | |
# a given bounding box is equal to, or a few pixels less than, | |
# the maximum width of the bounding box. | |
wideboxes = [] | |
yvalues = [] | |
for cnt in contours: | |
x, y, w, h = cv2.boundingRect(cnt) | |
if (w >= (maxwidth - xgive)): | |
wideboxes.append(cnt) | |
yvalues.append(y) | |
# now, we toss out bounding boxes that are more than | |
# 1 standard deviation | |
# away from the nearest other bounding box to get rid of outliers | |
filtered = [] | |
mean = np.mean(yvalues, axis=0) | |
sd = np.std(yvalues, axis=0) | |
for cnt in wideboxes: | |
x, y, w, h = cv2.boundingRect(cnt) | |
if not (y < sd): | |
filtered.append(cnt) | |
# From the FILTERED list, we get the maximum Y value. | |
maxy = img.shape[0] | |
for cnt in filtered: | |
x, y, w, h = cv2.boundingRect(cnt) | |
if (y < maxy): | |
maxy = y | |
# Now we check the ORIGINAL LIST OF BOUNDING BOXES | |
# to see if it matches the Y value | |
starty = 0 | |
for cnt in contours: | |
x, y, w, h = cv2.boundingRect(cnt) | |
if y >= maxy: | |
# Drawing a rectangle on copied image | |
starty = y | |
rect = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) | |
mask = np.zeros(img.shape[:2], dtype="uint8") | |
cv2.rectangle(mask, (0,starty), (width,height), (255,255,255), -1) | |
masked = cv2.bitwise_and(newimg, newimg, mask=mask) | |
# If necessary, further processing can be performed at this stage. | |
cv2.imwrite(roiname,masked) | |
cv2.imwrite(out,img) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment