Skip to content

Instantly share code, notes, and snippets.

@darkedges
Last active June 20, 2021 11:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save darkedges/a4f30eb1233ad17007a18dd7372b5b5c to your computer and use it in GitHub Desktop.
Save darkedges/a4f30eb1233ad17007a18dd7372b5b5c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# http://wolframlanguagereviews.org/2018/10/21/pokemon-card-detector/
import numpy as np
import cv2
from collections import defaultdict
import sys
from matplotlib import pyplot as plt
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
rect = np.zeros((4, 2), dtype="float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
def segment_by_angle_kmeans(lines, k=2, **kwargs):
"""
Group lines by their angle using k-means clustering.
Code from here:
https://stackoverflow.com/a/46572063/1755401
"""
# Define criteria = (type, max_iter, epsilon)
default_criteria_type = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER
criteria = kwargs.get('criteria', (default_criteria_type, 10, 1.0))
flags = kwargs.get('flags', cv2.KMEANS_RANDOM_CENTERS)
attempts = kwargs.get('attempts', 10)
# Get angles in [0, pi] radians
angles = np.array([line[0][1] for line in lines])
# Multiply the angles by two and find coordinates of that angle on the Unit Circle
pts = np.array([[np.cos(2*angle), np.sin(2*angle)]
for angle in angles], dtype=np.float32)
# Run k-means
if sys.version_info[0] == 2:
# python 2.x
ret, labels, centers = cv2.kmeans(pts, k, criteria, attempts, flags)
else:
# python 3.x, syntax has changed.
labels, centers = cv2.kmeans(
pts, k, None, criteria, attempts, flags)[1:]
labels = labels.reshape(-1) # Transpose to row vector
# Segment lines based on their label of 0 or 1
segmented = defaultdict(list)
for i, line in zip(range(len(lines)), lines):
segmented[labels[i]].append(line)
segmented = list(segmented.values())
return segmented
def intersection(line1, line2):
"""
Find the intersection of two lines
specified in Hesse normal form.
Returns closest integer pixel locations.
See here:
https://stackoverflow.com/a/383527/5087436
"""
rho1, theta1 = line1[0]
rho2, theta2 = line2[0]
A = np.array([[np.cos(theta1), np.sin(theta1)],
[np.cos(theta2), np.sin(theta2)]])
b = np.array([[rho1], [rho2]])
x0, y0 = np.linalg.solve(A, b)
x0, y0 = int(np.round(x0)), int(np.round(y0))
return [[x0, y0]]
def segmented_intersections(lines):
"""
Find the intersection between groups of lines.
"""
intersections = []
for i, group in enumerate(lines[:-1]):
for next_group in lines[i+1:]:
for line1 in group:
for line2 in next_group:
intersections.append(intersection(line1, line2))
return intersections
def drawLines(img, lines, color=(255, 0, 0)):
"""
Draw lines on an image
"""
for line in lines:
for rho, theta in line:
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 1000*(-b))
y1 = int(y0 + 1000*(a))
x2 = int(x0 - 1000*(-b))
y2 = int(y0 - 1000*(a))
cv2.line(img, (x1, y1), (x2, y2), color, 1)
def detect_edge(img):
img_blurred = cv2.GaussianBlur(img, (5, 5), 0)
canny = cv2.Canny(img_blurred, 100, 200, None, 3)
return canny
img = cv2.imread('pokemon.png')[:, :, ::-1]
# cv2.imshow("original image", img)
# cv2.waitKey()
edged = detect_edge(img.copy())
# cv2.imshow("edge detection", edged)
# cv2.waitKey()
contours, hierarchy = cv2.findContours(
edged, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
sorted_contours = sorted(contours, key=cv2.contourArea, reverse=True)
img_w, img_h = np.shape(img)[:2]
CARD_MIN_AREA = (img_w * img_h) * .35
img2 = np.zeros((img_w, img_h, 3), dtype=np.uint8)
img4 = np.zeros((img_w, img_h, 3), dtype=np.uint8)
largest_item = sorted_contours[0]
hull = cv2.convexHull(largest_item)
size = cv2.contourArea(hull)
epsilon = 0.1*cv2.arcLength(hull, True)
peri = cv2.arcLength(hull, True)
approx = cv2.approxPolyDP(hull, epsilon, True)
if ((size > CARD_MIN_AREA) and (4 <= len(approx) <= 5)):
cv2.drawContours(img2, [hull], -1, (255, 255, 255), 1)
grayimg = cv2.cvtColor(img2, cv2.COLOR_RGB2GRAY)
# cv2.imshow("filtered edges", grayimg)
# cv2.waitKey()
lines = cv2.HoughLines(grayimg, 1, np.pi/83, 73)
# Draw all Hough lines in red
img_with_all_lines = np.copy(img)
drawLines(img_with_all_lines, lines)
# cv2.imshow("Hough lines", img_with_all_lines)
# cv2.waitKey()
# Cluster line angles into 2 groups (vertical and horizontal)
segmented = segment_by_angle_kmeans(lines, 2)
# Find the intersections of each vertical line with each horizontal line
intersections = segmented_intersections(segmented)
img_with_segmented_lines = np.copy(img)
# https://pastiebin.com/5f36425b7ae3d
# Draw intersection points in magenta
for point in intersections:
pt = (point[0][0], point[0][1])
img_with_segmented_lines = cv2.circle(
img_with_segmented_lines, pt, 3, (255, 0, 0), -1)
# cv2.imshow("Segmented lines", img_with_segmented_lines)
# cv2.waitKey()
src_pts = np.array([intersections[0], intersections[1],
intersections[2], intersections[3]], dtype=np.int32)
warped = four_point_transform(img, src_pts.reshape(4, 2))
# cv2.imshow("Perpesctive transform", warped)
# cv2.waitKey()
titles = ['Original Image', 'Edge Detection', 'Filtered Edges',
'Hough Lines', 'Segmented Lines', 'Perspective Transform']
images = [img, edged, grayimg, img_with_all_lines,
img_with_segmented_lines, warped]
for i in range(len(titles)):
plt.subplot(2, 3, i+1)
plt.imshow(images[i], 'gray')
plt.title(titles[i])
plt.xticks([]), plt.yticks([])
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment