bemasher/doc-detect.py

## doc-detect.py
import pathlib
import cv2 as cv
import numpy as np
from skimage.measure import LineModelND, ransac

srcPath = pathlib.Path('input')
dstPath = pathlib.Path('output')

k = cv.getStructuringElement(cv.MORPH_CROSS, (3, 3))

for src in srcPath.glob('*.jpg'):
    dst = dstPath / src.with_suffix(".png").name
    print(src, '->', dst)

    # if dst.exists():
    #     continue

    img = cv.imread(str(src))

    gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    dy, dx = gray.shape

    # Throw out some details.
    blur = cv.medianBlur(gray, 15)

    # Find edges.
    edges = cv.Canny(blur, 0, 100, apertureSize=3)

    # Close any gaps.
    edges = cv.morphologyEx(edges, cv.MORPH_DILATE, k)

    # Find contours.
    cnts, heir = cv.findContours(
        edges,
        cv.RETR_LIST,
        # cv.CHAIN_APPROX_SIMPLE
        cv.CHAIN_APPROX_NONE
    )

    # Assume the largest contour is our page.
    page = max(cnts, key=cv.contourArea).reshape(-1, 2)

    d, _ = page.shape
    page = page[::d // (4*100)]  # Keep 100 points per side.

    # Select outermost quarters in the x and y dimensions.

    # x . . x
    # x . . x
    # x . . x
    # x . . x
    l = page[:, 0] < dx//4
    r = page[:, 0] >= dx//4*3

    # y y y y
    # . . . .
    # . . . .
    # y y y y
    b = page[:, 1] < dy//4
    t = page[:, 1] >= dy//4*3

    lines = []

    # In each slice of points.
    for sel in [l, r, t, b]:
        # Use ransac to select points on a line.
        robust, inliers = ransac(
            page[sel],
            LineModelND,
            min_samples=2,
            residual_threshold=1,
            max_trials=100,
        )

        # Add the line's parameters to our list.
        lines.append(robust.params)

    # Make a list of points.
    c = np.array([])

    # For each line.
    for i1, l1 in enumerate(lines):
        p1 = l1[0]  # origin
        v1 = l1[1]  # direction

        # For each other line.
        for l2 in lines[i1+1:]:
            p2 = l2[0]  # origin
            v2 = l2[1]  # direction

            # Find the intersection between l1 and l2.
            v = np.subtract(p2, p1)
            vp = np.cross(v1, v2)
            num = np.dot(np.cross(v, v2), vp)
            denom = np.linalg.norm(vp) ** 2
            va = num / denom * v1
            x, y = p1 + va

            # If the intersection point is outside the image, ignore it.
            if x < 0 or x > dx or y < 0 or y > dy:
                continue

            # Add it to the corner list.
            c = np.append(c, [x, y])

    # Make c a list of x,y coordinates.
    c = c.reshape(-1, 2)

    # Get coordinates in a specific order.
    tl = c[(c[:, 0] < dx//2) & (c[:, 1] < dy//2)]    # Top-Left
    tr = c[(c[:, 0] >= dx//2) & (c[:, 1] < dy//2)]   # Top-Right
    bl = c[(c[:, 0] < dx//2) & (c[:, 1] >= dy//2)]   # Bottom-Left
    br = c[(c[:, 0] >= dx//2) & (c[:, 1] >= dy//2)]  # Bottom-Right

    # Calculate dimensions for the document.
    x = max(
        np.hypot(*(tr-tl).ravel()),
        np.hypot(*(br-bl).ravel())
    )
    y = max(
        np.hypot(*(tl-bl).ravel()),
        np.hypot(*(tr-br).ravel())
    )

    # Downscale to 400dpi assuming Letter-sized page.
    scale = 4400 / y
    x = int(x * scale)
    y = 4400

    # Calculate a perspective transform.
    m = cv.getPerspectiveTransform(
        np.array([tl, tr, br, bl], dtype=np.float32),
        np.array([[0, 0], [x, 0], [x, y], [0, y]], dtype=np.float32)
    )

    # Apply the transform.
    warp = cv.warpPerspective(gray, m, (x, y), cv.INTER_LANCZOS4)

    # Apply adaptive thresholding.
    thresh = cv.adaptiveThreshold(
        warp, 255,
        cv.ADAPTIVE_THRESH_MEAN_C,
        cv.THRESH_BINARY,
        255, 30
    )

    thresh, _ = cv.filterSpeckles(thresh, 255, 8, 64)

    # Erase edges.
    edgeMargin = 16

    thresh[:edgeMargin, :] = 255
    thresh[-edgeMargin:, :] = 255
    thresh[:, :edgeMargin] = 255
    thresh[:, -edgeMargin:] = 255

    # Write image to disk.
    cv.imwrite(str(dst), thresh)
	import pathlib
	import cv2 as cv
	import numpy as np
	from skimage.measure import LineModelND, ransac

	srcPath = pathlib.Path('input')
	dstPath = pathlib.Path('output')

	k = cv.getStructuringElement(cv.MORPH_CROSS, (3, 3))

	for src in srcPath.glob('*.jpg'):
	dst = dstPath / src.with_suffix(".png").name
	print(src, '->', dst)

	# if dst.exists():
	# continue

	img = cv.imread(str(src))

	gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
	dy, dx = gray.shape

	# Throw out some details.
	blur = cv.medianBlur(gray, 15)

	# Find edges.
	edges = cv.Canny(blur, 0, 100, apertureSize=3)

	# Close any gaps.
	edges = cv.morphologyEx(edges, cv.MORPH_DILATE, k)

	# Find contours.
	cnts, heir = cv.findContours(
	edges,
	cv.RETR_LIST,
	# cv.CHAIN_APPROX_SIMPLE
	cv.CHAIN_APPROX_NONE
	)

	# Assume the largest contour is our page.
	page = max(cnts, key=cv.contourArea).reshape(-1, 2)

	d, _ = page.shape
	page = page[::d // (4*100)] # Keep 100 points per side.

	# Select outermost quarters in the x and y dimensions.

	# x . . x
	# x . . x
	# x . . x
	# x . . x
	l = page[:, 0] < dx//4
	r = page[:, 0] >= dx//4*3

	# y y y y
	# . . . .
	# . . . .
	# y y y y
	b = page[:, 1] < dy//4
	t = page[:, 1] >= dy//4*3

	lines = []

	# In each slice of points.
	for sel in [l, r, t, b]:
	# Use ransac to select points on a line.
	robust, inliers = ransac(
	page[sel],
	LineModelND,
	min_samples=2,
	residual_threshold=1,
	max_trials=100,
	)

	# Add the line's parameters to our list.
	lines.append(robust.params)

	# Make a list of points.
	c = np.array([])

	# For each line.
	for i1, l1 in enumerate(lines):
	p1 = l1[0] # origin
	v1 = l1[1] # direction

	# For each other line.
	for l2 in lines[i1+1:]:
	p2 = l2[0] # origin
	v2 = l2[1] # direction

	# Find the intersection between l1 and l2.
	v = np.subtract(p2, p1)
	vp = np.cross(v1, v2)
	num = np.dot(np.cross(v, v2), vp)
	denom = np.linalg.norm(vp) ** 2
	va = num / denom * v1
	x, y = p1 + va

	# If the intersection point is outside the image, ignore it.
	if x < 0 or x > dx or y < 0 or y > dy:
	continue

	# Add it to the corner list.
	c = np.append(c, [x, y])

	# Make c a list of x,y coordinates.
	c = c.reshape(-1, 2)

	# Get coordinates in a specific order.
	tl = c[(c[:, 0] < dx//2) & (c[:, 1] < dy//2)] # Top-Left
	tr = c[(c[:, 0] >= dx//2) & (c[:, 1] < dy//2)] # Top-Right
	bl = c[(c[:, 0] < dx//2) & (c[:, 1] >= dy//2)] # Bottom-Left
	br = c[(c[:, 0] >= dx//2) & (c[:, 1] >= dy//2)] # Bottom-Right

	# Calculate dimensions for the document.
	x = max(
	np.hypot(*(tr-tl).ravel()),
	np.hypot(*(br-bl).ravel())
	)
	y = max(
	np.hypot(*(tl-bl).ravel()),
	np.hypot(*(tr-br).ravel())
	)

	# Downscale to 400dpi assuming Letter-sized page.
	scale = 4400 / y
	x = int(x * scale)
	y = 4400

	# Calculate a perspective transform.
	m = cv.getPerspectiveTransform(
	np.array([tl, tr, br, bl], dtype=np.float32),
	np.array([[0, 0], [x, 0], [x, y], [0, y]], dtype=np.float32)
	)

	# Apply the transform.
	warp = cv.warpPerspective(gray, m, (x, y), cv.INTER_LANCZOS4)

	# Apply adaptive thresholding.
	thresh = cv.adaptiveThreshold(
	warp, 255,
	cv.ADAPTIVE_THRESH_MEAN_C,
	cv.THRESH_BINARY,
	255, 30
	)

	thresh, _ = cv.filterSpeckles(thresh, 255, 8, 64)

	# Erase edges.
	edgeMargin = 16

	thresh[:edgeMargin, :] = 255
	thresh[-edgeMargin:, :] = 255
	thresh[:, :edgeMargin] = 255
	thresh[:, -edgeMargin:] = 255

	# Write image to disk.
	cv.imwrite(str(dst), thresh)