Created
December 25, 2018 08:33
-
-
Save ayulockin/0707d96a5c0df965482210c47f88ca8d to your computer and use it in GitHub Desktop.
Code to implemet a basic document scanner
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
from scipy.spatial import distance as dist | |
import numpy as np | |
def resize(image, width=None, height=None, inter=cv2.INTER_AREA): | |
dim = None | |
(h, w) = image.shape[:2] | |
# check to see if the width is None | |
if width is None: | |
r = height / float(h) #ratio | |
dim = (int(w * r), height) | |
# otherwise, the height is None | |
else: | |
r = width / float(w) #ratio | |
dim = (width, int(h * r)) | |
# resize the image | |
resized = cv2.resize(image, dim, interpolation=inter) | |
# return the resized image | |
return resized | |
def order_points(pts): | |
# sort the points based on their x-coordinates | |
xSorted = pts[np.argsort(pts[:, 0]), :] | |
# grab the left-most and right-most points from the sorted | |
# x-roodinate points | |
leftMost = xSorted[:2, :] | |
rightMost = xSorted[2:, :] | |
# now, sort the left-most coordinates according to their | |
# y-coordinates so we can grab the top-left and bottom-left | |
# points, respectively | |
leftMost = leftMost[np.argsort(leftMost[:, 1]), :] | |
(tl, bl) = leftMost | |
# now that we have the top-left coordinate, use it as an | |
# anchor to calculate the Euclidean distance between the | |
# top-left and right-most points; by the Pythagorean | |
# theorem, the point with the largest distance will be | |
# our bottom-right point | |
D = dist.cdist(tl[np.newaxis], rightMost, "euclidean")[0] | |
(br, tr) = rightMost[np.argsort(D)[::-1], :] | |
# return the coordinates in top-left, top-right, | |
# bottom-right, and bottom-left order | |
return np.array([tl, tr, br, bl], dtype="float32") | |
def four_point_transform(image, pts): | |
# obtain a consistent order of the points and unpack them | |
# individually | |
rect = order_points(pts) | |
(tl, tr, br, bl) = rect | |
# compute the width of the new image, which will be the | |
# maximum distance between bottom-right and bottom-left | |
# x-coordiates or the top-right and top-left x-coordinates | |
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) | |
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) | |
maxWidth = max(int(widthA), int(widthB)) | |
# compute the height of the new image, which will be the | |
# maximum distance between the top-right and bottom-right | |
# y-coordinates or the top-left and bottom-left y-coordinates | |
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) | |
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) | |
maxHeight = max(int(heightA), int(heightB)) | |
# now that we have the dimensions of the new image, construct | |
# the set of destination points to obtain a "birds eye view", | |
# (i.e. top-down view) of the image, again specifying points | |
# in the top-left, top-right, bottom-right, and bottom-left | |
# order | |
dst = np.array([ | |
[0, 0], | |
[maxWidth - 1, 0], | |
[maxWidth - 1, maxHeight - 1], | |
[0, maxHeight - 1]], dtype="float32") | |
# compute the perspective transform matrix and then apply it | |
M = cv2.getPerspectiveTransform(rect, dst) | |
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) | |
# return the warped image | |
return warped | |
def auto_canny(image, sigma=0.33): | |
# compute the median of the single channel pixel intensities | |
v = np.median(image) | |
# apply automatic Canny edge detection using the computed median | |
lower = int(max(0, (1.0 - sigma) * v)) | |
upper = int(min(255, (1.0 + sigma) * v)) | |
edged = cv2.Canny(image, lower, upper) | |
# return the edged image | |
return edged | |
# load image | |
image = cv2.imread('emp2.jpg') | |
orig = image.copy() | |
ratio = image.shape[0]/500.0 | |
# resize image | |
resized = resize(image, height = 500) | |
# grayscale | |
gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY) | |
# Filter noise | |
blur = cv2.GaussianBlur(gray, (5,5), 0) | |
# Edge detect | |
edges = cv2.Canny(blur, 100, 250) | |
# find contours | |
contoured, contours, hierarchy = cv2.findContours(edges.copy(), | |
cv2.RETR_TREE, | |
cv2.CHAIN_APPROX_SIMPLE) | |
contours = sorted(contours, key = cv2.contourArea, reverse = True)[:5] | |
# loop over the contours | |
for c in contours: | |
# approximate the contour | |
peri = cv2.arcLength(c, True) | |
approx = cv2.approxPolyDP(c, 0.02 * peri, True) | |
# if our approximated contour has four points, then we | |
# can assume that we have found our screen | |
if len(approx) == 4: | |
screenCnt = approx | |
break | |
draw = cv2.drawContours(resized, contours, -1, (0,255,0), 3) | |
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio) | |
# convert the warped image to grayscale, then threshold it | |
# to give it that 'black and white' paper effect | |
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) | |
##th = cv2.adaptiveThreshold(warped,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
## cv2.THRESH_BINARY,11,2) | |
##warped = (warped < th).astype("uint8") * 255 | |
#display images | |
##cv2.imshow('image(resized)', resized) | |
##cv2.imshow('grayed', gray) | |
##cv2.imshow('blurred', blur) | |
#cv2.imshow('edged', edges) | |
cv2.imshow('contours', contoured) | |
cv2.imshow('draw', draw) | |
cv2.imshow("Original", resize(orig, height = 450)) | |
cv2.imshow("Scanned", resize(warped, height = 450)) | |
# save image | |
cv2.imwrite('original.jpg', resize(orig, height=500)) | |
cv2.imwrite('scan.jpg', resize(warped, height=500)) | |
Author
ayulockin
commented
Dec 25, 2018
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment