Skip to content

Instantly share code, notes, and snippets.

@bemasher
Created March 7, 2022 22:20
Show Gist options
  • Save bemasher/4ebcc965fa5e715c9c1c3fd480d5a184 to your computer and use it in GitHub Desktop.
Save bemasher/4ebcc965fa5e715c9c1c3fd480d5a184 to your computer and use it in GitHub Desktop.
Given an image of a scanned greyscale document with overscan borders, crop and threshold to just the document.
import pathlib
import cv2 as cv
import numpy as np
from skimage.measure import LineModelND, ransac
srcPath = pathlib.Path('input')
dstPath = pathlib.Path('output')
k = cv.getStructuringElement(cv.MORPH_CROSS, (3, 3))
for src in srcPath.glob('*.jpg'):
dst = dstPath / src.with_suffix(".png").name
print(src, '->', dst)
# if dst.exists():
# continue
img = cv.imread(str(src))
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
dy, dx = gray.shape
# Throw out some details.
blur = cv.medianBlur(gray, 15)
# Find edges.
edges = cv.Canny(blur, 0, 100, apertureSize=3)
# Close any gaps.
edges = cv.morphologyEx(edges, cv.MORPH_DILATE, k)
# Find contours.
cnts, heir = cv.findContours(
edges,
cv.RETR_LIST,
# cv.CHAIN_APPROX_SIMPLE
cv.CHAIN_APPROX_NONE
)
# Assume the largest contour is our page.
page = max(cnts, key=cv.contourArea).reshape(-1, 2)
d, _ = page.shape
page = page[::d // (4*100)] # Keep 100 points per side.
# Select outermost quarters in the x and y dimensions.
# x . . x
# x . . x
# x . . x
# x . . x
l = page[:, 0] < dx//4
r = page[:, 0] >= dx//4*3
# y y y y
# . . . .
# . . . .
# y y y y
b = page[:, 1] < dy//4
t = page[:, 1] >= dy//4*3
lines = []
# In each slice of points.
for sel in [l, r, t, b]:
# Use ransac to select points on a line.
robust, inliers = ransac(
page[sel],
LineModelND,
min_samples=2,
residual_threshold=1,
max_trials=100,
)
# Add the line's parameters to our list.
lines.append(robust.params)
# Make a list of points.
c = np.array([])
# For each line.
for i1, l1 in enumerate(lines):
p1 = l1[0] # origin
v1 = l1[1] # direction
# For each other line.
for l2 in lines[i1+1:]:
p2 = l2[0] # origin
v2 = l2[1] # direction
# Find the intersection between l1 and l2.
v = np.subtract(p2, p1)
vp = np.cross(v1, v2)
num = np.dot(np.cross(v, v2), vp)
denom = np.linalg.norm(vp) ** 2
va = num / denom * v1
x, y = p1 + va
# If the intersection point is outside the image, ignore it.
if x < 0 or x > dx or y < 0 or y > dy:
continue
# Add it to the corner list.
c = np.append(c, [x, y])
# Make c a list of x,y coordinates.
c = c.reshape(-1, 2)
# Get coordinates in a specific order.
tl = c[(c[:, 0] < dx//2) & (c[:, 1] < dy//2)] # Top-Left
tr = c[(c[:, 0] >= dx//2) & (c[:, 1] < dy//2)] # Top-Right
bl = c[(c[:, 0] < dx//2) & (c[:, 1] >= dy//2)] # Bottom-Left
br = c[(c[:, 0] >= dx//2) & (c[:, 1] >= dy//2)] # Bottom-Right
# Calculate dimensions for the document.
x = max(
np.hypot(*(tr-tl).ravel()),
np.hypot(*(br-bl).ravel())
)
y = max(
np.hypot(*(tl-bl).ravel()),
np.hypot(*(tr-br).ravel())
)
# Downscale to 400dpi assuming Letter-sized page.
scale = 4400 / y
x = int(x * scale)
y = 4400
# Calculate a perspective transform.
m = cv.getPerspectiveTransform(
np.array([tl, tr, br, bl], dtype=np.float32),
np.array([[0, 0], [x, 0], [x, y], [0, y]], dtype=np.float32)
)
# Apply the transform.
warp = cv.warpPerspective(gray, m, (x, y), cv.INTER_LANCZOS4)
# Apply adaptive thresholding.
thresh = cv.adaptiveThreshold(
warp, 255,
cv.ADAPTIVE_THRESH_MEAN_C,
cv.THRESH_BINARY,
255, 30
)
thresh, _ = cv.filterSpeckles(thresh, 255, 8, 64)
# Erase edges.
edgeMargin = 16
thresh[:edgeMargin, :] = 255
thresh[-edgeMargin:, :] = 255
thresh[:, :edgeMargin] = 255
thresh[:, -edgeMargin:] = 255
# Write image to disk.
cv.imwrite(str(dst), thresh)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment