Skip to content

Instantly share code, notes, and snippets.

@lpillmann
Last active January 1, 2022 00:06
Show Gist options
  • Save lpillmann/d76eb4f4eea0320bb35dcd1b2a4575ee to your computer and use it in GitHub Desktop.
Save lpillmann/d76eb4f4eea0320bb35dcd1b2a4575ee to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
'''Crop an image to just the portions containing text.
Usage:
./crop_morphology.py path/to/image.jpg
This will place the cropped image in path/to/image.crop.png.
For details on the methodology, see
http://www.danvk.org/2015/01/07/finding-blocks-of-text-in-an-image-using-python-opencv-and-numpy.html
Script created by Dan Vanderkam (https://github.com/danvk)
Adapted to Python 3 by Lui Pillmann (https://github.com/luipillmann)
'''
import glob
import os
import random
import sys
import random
import math
import json
from collections import defaultdict
import cv2
from PIL import Image, ImageDraw
import numpy as np
from scipy.ndimage.filters import rank_filter
def dilate(ary, N, iterations):
"""Dilate using an NxN '+' sign shape. ary is np.uint8."""
kernel = np.zeros((N,N), dtype=np.uint8)
kernel[(N-1)//2,:] = 1 # Bug solved with // (integer division)
dilated_image = cv2.dilate(ary / 255, kernel, iterations=iterations)
kernel = np.zeros((N,N), dtype=np.uint8)
kernel[:,(N-1)//2] = 1 # Bug solved with // (integer division)
dilated_image = cv2.dilate(dilated_image, kernel, iterations=iterations)
return dilated_image
def props_for_contours(contours, ary):
"""Calculate bounding box & the number of set pixels for each contour."""
c_info = []
for c in contours:
x,y,w,h = cv2.boundingRect(c)
c_im = np.zeros(ary.shape)
cv2.drawContours(c_im, [c], 0, 255, -1)
c_info.append({
'x1': x,
'y1': y,
'x2': x + w - 1,
'y2': y + h - 1,
'sum': np.sum(ary * (c_im > 0))/255
})
return c_info
def union_crops(crop1, crop2):
"""Union two (x1, y1, x2, y2) rects."""
x11, y11, x21, y21 = crop1
x12, y12, x22, y22 = crop2
return min(x11, x12), min(y11, y12), max(x21, x22), max(y21, y22)
def intersect_crops(crop1, crop2):
x11, y11, x21, y21 = crop1
x12, y12, x22, y22 = crop2
return max(x11, x12), max(y11, y12), min(x21, x22), min(y21, y22)
def crop_area(crop):
x1, y1, x2, y2 = crop
return max(0, x2 - x1) * max(0, y2 - y1)
def find_border_components(contours, ary):
borders = []
area = ary.shape[0] * ary.shape[1]
for i, c in enumerate(contours):
x,y,w,h = cv2.boundingRect(c)
if w * h > 0.5 * area:
borders.append((i, x, y, x + w - 1, y + h - 1))
return borders
def angle_from_right(deg):
return min(deg % 90, 90 - (deg % 90))
def remove_border(contour, ary):
"""Remove everything outside a border contour."""
# Use a rotated rectangle (should be a good approximation of a border).
# If it's far from a right angle, it's probably two sides of a border and
# we should use the bounding box instead.
c_im = np.zeros(ary.shape)
r = cv2.minAreaRect(contour)
degs = r[2]
if angle_from_right(degs) <= 10.0:
box = cv2.boxPoints(r)
box = np.int0(box)
cv2.drawContours(c_im, [box], 0, 255, -1)
cv2.drawContours(c_im, [box], 0, 0, 4)
else:
x1, y1, x2, y2 = cv2.boundingRect(contour)
cv2.rectangle(c_im, (x1, y1), (x2, y2), 255, -1)
cv2.rectangle(c_im, (x1, y1), (x2, y2), 0, 4)
return np.minimum(c_im, ary)
def find_components(edges, max_components=16):
"""Dilate the image until there are just a few connected components.
Returns contours for these components."""
# Perform increasingly aggressive dilation until there are just a few
# connected components.
count = 21
dilation = 5
n = 1
while count > 16:
n += 1
dilated_image = dilate(edges, N=3, iterations=n)
dilated_image = np.uint8(dilated_image)
_, contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
count = len(contours)
#print dilation
#Image.fromarray(edges).show()
#Image.fromarray(255 * dilated_image).show()
return contours
def find_optimal_components_subset(contours, edges):
"""Find a crop which strikes a good balance of coverage/compactness.
Returns an (x1, y1, x2, y2) tuple.
"""
c_info = props_for_contours(contours, edges)
c_info.sort(key=lambda x: -x['sum'])
total = np.sum(edges) / 255
area = edges.shape[0] * edges.shape[1]
c = c_info[0]
del c_info[0]
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
crop = this_crop
covered_sum = c['sum']
while covered_sum < total:
changed = False
recall = 1.0 * covered_sum / total
prec = 1 - 1.0 * crop_area(crop) / area
f1 = 2 * (prec * recall / (prec + recall))
#print '----'
for i, c in enumerate(c_info):
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
new_crop = union_crops(crop, this_crop)
new_sum = covered_sum + c['sum']
new_recall = 1.0 * new_sum / total
new_prec = 1 - 1.0 * crop_area(new_crop) / area
new_f1 = 2 * new_prec * new_recall / (new_prec + new_recall)
# Add this crop if it improves f1 score,
# _or_ it adds 25% of the remaining pixels for <15% crop expansion.
# ^^^ very ad-hoc! make this smoother
remaining_frac = c['sum'] / (total - covered_sum)
new_area_frac = 1.0 * crop_area(new_crop) / crop_area(crop) - 1
if new_f1 > f1 or (
remaining_frac > 0.25 and new_area_frac < 0.15):
print('%d %s -> %s / %s (%s), %s -> %s / %s (%s), %s -> %s' % (
i, covered_sum, new_sum, total, remaining_frac,
crop_area(crop), crop_area(new_crop), area, new_area_frac,
f1, new_f1))
crop = new_crop
covered_sum = new_sum
del c_info[i]
changed = True
break
if not changed:
break
return crop
def pad_crop(crop, contours, edges, border_contour, pad_px=15):
"""Slightly expand the crop to get full contours.
This will expand to include any contours it currently intersects, but will
not expand past a border.
"""
bx1, by1, bx2, by2 = 0, 0, edges.shape[0], edges.shape[1]
if border_contour is not None and len(border_contour) > 0:
c = props_for_contours([border_contour], edges)[0]
bx1, by1, bx2, by2 = c['x1'] + 5, c['y1'] + 5, c['x2'] - 5, c['y2'] - 5
def crop_in_border(crop):
x1, y1, x2, y2 = crop
x1 = max(x1 - pad_px, bx1)
y1 = max(y1 - pad_px, by1)
x2 = min(x2 + pad_px, bx2)
y2 = min(y2 + pad_px, by2)
return crop
crop = crop_in_border(crop)
c_info = props_for_contours(contours, edges)
changed = False
for c in c_info:
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
this_area = crop_area(this_crop)
int_area = crop_area(intersect_crops(crop, this_crop))
new_crop = crop_in_border(union_crops(crop, this_crop))
if 0 < int_area < this_area and crop != new_crop:
print('%s -> %s' % (str(crop), str(new_crop)))
changed = True
crop = new_crop
if changed:
return pad_crop(crop, contours, edges, border_contour, pad_px)
else:
return crop
def downscale_image(im, max_dim=2048):
"""Shrink im until its longest dimension is <= max_dim.
Returns new_image, scale (where scale <= 1).
"""
a, b = im.size
if max(a, b) <= max_dim:
return 1.0, im
scale = 1.0 * max_dim / max(a, b)
new_im = im.resize((int(a * scale), int(b * scale)), Image.ANTIALIAS)
return scale, new_im
def process_image(path, out_path):
orig_im = Image.open(path)
scale, im = downscale_image(orig_im)
edges = cv2.Canny(np.asarray(im), 100, 200)
# TODO: dilate image _before_ finding a border. This is crazy sensitive!
_, contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
borders = find_border_components(contours, edges)
borders.sort(key=lambda i_x1_y1_x2_y2: (i_x1_y1_x2_y2[3] - i_x1_y1_x2_y2[1]) * (i_x1_y1_x2_y2[4] - i_x1_y1_x2_y2[2]))
border_contour = None
if len(borders):
border_contour = contours[borders[0][0]]
edges = remove_border(border_contour, edges)
edges = 255 * (edges > 0).astype(np.uint8)
# Remove ~1px borders using a rank filter.
maxed_rows = rank_filter(edges, -4, size=(1, 20))
maxed_cols = rank_filter(edges, -4, size=(20, 1))
debordered = np.minimum(np.minimum(edges, maxed_rows), maxed_cols)
edges = debordered
contours = find_components(edges)
if len(contours) == 0:
print('%s -> (no text!)' % path)
return
crop = find_optimal_components_subset(contours, edges)
crop = pad_crop(crop, contours, edges, border_contour)
crop = [int(x / scale) for x in crop] # upscale to the original image size.
#draw = ImageDraw.Draw(im)
#c_info = props_for_contours(contours, edges)
#for c in c_info:
# this_crop = c['x1'], c['y1'], c['x2'], c['y2']
# draw.rectangle(this_crop, outline='blue')
#draw.rectangle(crop, outline='red')
#im.save(out_path)
#draw.text((50, 50), path, fill='red')
#orig_im.save(out_path)
#im.show()
text_im = orig_im.crop(crop)
text_im.save(out_path)
print('%s -> %s' % (path, out_path))
if __name__ == '__main__':
if len(sys.argv) == 2 and '*' in sys.argv[1]:
files = glob.glob(sys.argv[1])
random.shuffle(files)
else:
files = sys.argv[1:]
for path in files:
out_path = path.replace('.jpg', '.crop.png')
#out_path = path.replace('.png', '.crop.png') # .png as input
if os.path.exists(out_path): continue
try:
process_image(path, out_path)
except Exception as e:
print('%s %s' % (path, e))
@gardneka
Copy link

Hi, Lui!

I hope you do not think this question is dumb, but how to I read the image in to use the program(s) you created? I'm new to Python and haven't done much with programs. Hope my question makes sense!

@lpillmann
Copy link
Author

lpillmann commented Aug 13, 2017

Hi, @gardneka! I hope it's still in time for an answer ;)

This code is "ready" to run as a bash script. So you can just call it as if it were a command (e.g. ls) and give the arguments it needs.

In this case, you have to use ./ before the script actual name (explanation here). Also, the argument in this case is the path to the image you want to crop, relative to the folder where the script is.

The example provided in line 6 illustrates that:

./crop_morphology.py path/to/image.jpg

This will place the cropped image in path/to/image.crop.png. (Note that it is in the same folder as the original image; it just creates a new image whose name is the original image filename + '.crop.png').

I hope that helps!

Cheers,

@RashmiPandeyatGit
Copy link

Is this file work for handwritten Text inside Image?
eassy

@speedfl
Copy link

speedfl commented Sep 28, 2017

Quick question. I am running your script on an image and I am getting this issue:

result10.jpg Required argument 'threshold2' (pos 4) not found

It comes from the first call to Canny in process_image

edges = cv2.Canny(np.asarray(im), 100, 200)

Do you maybe know what is the issue?

@NicoLivesey
Copy link

Hi Lui,

First thank you for this amazing adaptation, it is really great. I am currently trying to modify it for another purpose: I want it to find not only one optimal crop but more in the case where several blocks are too far from each other, the script would return two or more optimal crops. Do you have any clue or advice ?

@DatascientistHadoop
Copy link

hi i have executing above code in anaconda python 3.6 ..i have the issue or error like...
only integers, slices[':'],eclipses('.....'),numpy.newaxis and integer arrays are valid indices

@renarios
Copy link

renarios commented Feb 9, 2019

I ran crop_morphology.py on Ubuntu with Python 3.6 on Ubuntu and got an error:
/home/hubba/Pictures/image2.jpg not enough values to unpack (expected 3, got 2)
Do you have any idea how to solve this?

@faridelnasire
Copy link

Running into the same "not enough values to unpack" error on MacOS running Python 3.7.0.

@ddendaas
Copy link

Same error as @faridelnasire
not enough values to unpack (expected 3, got 2)

@llemonS
Copy link

llemonS commented Apr 11, 2019

same error here "not enough values to unpack"

@DeepakSuryaS
Copy link

_, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # fails with error "not enough values to unpack (expected 3, got 2)"

Change it to:

contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

@tclive
Copy link

tclive commented Jun 3, 2019

Thank you, Deepak!!

@elnazsn1988
Copy link

Hi Lui, Ive been implementing the code for some time now, I always get borders=[] after every run and the file in outpath is saved as the inpath file, without any contouring. Contours and Edges are both populated, when I run find_border_components(contours, edges) in the terminal, I get []. Im debugging as dont use Ubuntu and not sure how to run bash files.

@srikanthsampathi
Copy link

permission denied is my error

@bharathsivakumar
Copy link

bharathsivakumar commented Apr 15, 2020

@ srikanthsampathi
I hope this answer is still valuable to you. If you are using an Ubuntu system, you need to use the following commands in succession for the bash to work:
sudo chmod +x crop_morphology.py
./crop_morphology.py image_name.png

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment