Last active
June 20, 2021 11:32
-
-
Save darkedges/a4f30eb1233ad17007a18dd7372b5b5c to your computer and use it in GitHub Desktop.
Pokemon card detector - http://wolframlanguagereviews.org/2018/10/21/pokemon-card-detector/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# http://wolframlanguagereviews.org/2018/10/21/pokemon-card-detector/ | |
import numpy as np | |
import cv2 | |
from collections import defaultdict | |
import sys | |
from matplotlib import pyplot as plt | |
def order_points(pts): | |
# initialzie a list of coordinates that will be ordered | |
# such that the first entry in the list is the top-left, | |
# the second entry is the top-right, the third is the | |
# bottom-right, and the fourth is the bottom-left | |
rect = np.zeros((4, 2), dtype="float32") | |
# the top-left point will have the smallest sum, whereas | |
# the bottom-right point will have the largest sum | |
s = pts.sum(axis=1) | |
rect[0] = pts[np.argmin(s)] | |
rect[2] = pts[np.argmax(s)] | |
# now, compute the difference between the points, the | |
# top-right point will have the smallest difference, | |
# whereas the bottom-left will have the largest difference | |
diff = np.diff(pts, axis=1) | |
rect[1] = pts[np.argmin(diff)] | |
rect[3] = pts[np.argmax(diff)] | |
# return the ordered coordinates | |
return rect | |
def four_point_transform(image, pts): | |
# obtain a consistent order of the points and unpack them | |
# individually | |
rect = order_points(pts) | |
(tl, tr, br, bl) = rect | |
# compute the width of the new image, which will be the | |
# maximum distance between bottom-right and bottom-left | |
# x-coordiates or the top-right and top-left x-coordinates | |
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) | |
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) | |
maxWidth = max(int(widthA), int(widthB)) | |
# compute the height of the new image, which will be the | |
# maximum distance between the top-right and bottom-right | |
# y-coordinates or the top-left and bottom-left y-coordinates | |
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) | |
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) | |
maxHeight = max(int(heightA), int(heightB)) | |
# now that we have the dimensions of the new image, construct | |
# the set of destination points to obtain a "birds eye view", | |
# (i.e. top-down view) of the image, again specifying points | |
# in the top-left, top-right, bottom-right, and bottom-left | |
# order | |
dst = np.array([ | |
[0, 0], | |
[maxWidth - 1, 0], | |
[maxWidth - 1, maxHeight - 1], | |
[0, maxHeight - 1]], dtype="float32") | |
# compute the perspective transform matrix and then apply it | |
M = cv2.getPerspectiveTransform(rect, dst) | |
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) | |
# return the warped image | |
return warped | |
def segment_by_angle_kmeans(lines, k=2, **kwargs): | |
""" | |
Group lines by their angle using k-means clustering. | |
Code from here: | |
https://stackoverflow.com/a/46572063/1755401 | |
""" | |
# Define criteria = (type, max_iter, epsilon) | |
default_criteria_type = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER | |
criteria = kwargs.get('criteria', (default_criteria_type, 10, 1.0)) | |
flags = kwargs.get('flags', cv2.KMEANS_RANDOM_CENTERS) | |
attempts = kwargs.get('attempts', 10) | |
# Get angles in [0, pi] radians | |
angles = np.array([line[0][1] for line in lines]) | |
# Multiply the angles by two and find coordinates of that angle on the Unit Circle | |
pts = np.array([[np.cos(2*angle), np.sin(2*angle)] | |
for angle in angles], dtype=np.float32) | |
# Run k-means | |
if sys.version_info[0] == 2: | |
# python 2.x | |
ret, labels, centers = cv2.kmeans(pts, k, criteria, attempts, flags) | |
else: | |
# python 3.x, syntax has changed. | |
labels, centers = cv2.kmeans( | |
pts, k, None, criteria, attempts, flags)[1:] | |
labels = labels.reshape(-1) # Transpose to row vector | |
# Segment lines based on their label of 0 or 1 | |
segmented = defaultdict(list) | |
for i, line in zip(range(len(lines)), lines): | |
segmented[labels[i]].append(line) | |
segmented = list(segmented.values()) | |
return segmented | |
def intersection(line1, line2): | |
""" | |
Find the intersection of two lines | |
specified in Hesse normal form. | |
Returns closest integer pixel locations. | |
See here: | |
https://stackoverflow.com/a/383527/5087436 | |
""" | |
rho1, theta1 = line1[0] | |
rho2, theta2 = line2[0] | |
A = np.array([[np.cos(theta1), np.sin(theta1)], | |
[np.cos(theta2), np.sin(theta2)]]) | |
b = np.array([[rho1], [rho2]]) | |
x0, y0 = np.linalg.solve(A, b) | |
x0, y0 = int(np.round(x0)), int(np.round(y0)) | |
return [[x0, y0]] | |
def segmented_intersections(lines): | |
""" | |
Find the intersection between groups of lines. | |
""" | |
intersections = [] | |
for i, group in enumerate(lines[:-1]): | |
for next_group in lines[i+1:]: | |
for line1 in group: | |
for line2 in next_group: | |
intersections.append(intersection(line1, line2)) | |
return intersections | |
def drawLines(img, lines, color=(255, 0, 0)): | |
""" | |
Draw lines on an image | |
""" | |
for line in lines: | |
for rho, theta in line: | |
a = np.cos(theta) | |
b = np.sin(theta) | |
x0 = a*rho | |
y0 = b*rho | |
x1 = int(x0 + 1000*(-b)) | |
y1 = int(y0 + 1000*(a)) | |
x2 = int(x0 - 1000*(-b)) | |
y2 = int(y0 - 1000*(a)) | |
cv2.line(img, (x1, y1), (x2, y2), color, 1) | |
def detect_edge(img): | |
img_blurred = cv2.GaussianBlur(img, (5, 5), 0) | |
canny = cv2.Canny(img_blurred, 100, 200, None, 3) | |
return canny | |
img = cv2.imread('pokemon.png')[:, :, ::-1] | |
# cv2.imshow("original image", img) | |
# cv2.waitKey() | |
edged = detect_edge(img.copy()) | |
# cv2.imshow("edge detection", edged) | |
# cv2.waitKey() | |
contours, hierarchy = cv2.findContours( | |
edged, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | |
sorted_contours = sorted(contours, key=cv2.contourArea, reverse=True) | |
img_w, img_h = np.shape(img)[:2] | |
CARD_MIN_AREA = (img_w * img_h) * .35 | |
img2 = np.zeros((img_w, img_h, 3), dtype=np.uint8) | |
img4 = np.zeros((img_w, img_h, 3), dtype=np.uint8) | |
largest_item = sorted_contours[0] | |
hull = cv2.convexHull(largest_item) | |
size = cv2.contourArea(hull) | |
epsilon = 0.1*cv2.arcLength(hull, True) | |
peri = cv2.arcLength(hull, True) | |
approx = cv2.approxPolyDP(hull, epsilon, True) | |
if ((size > CARD_MIN_AREA) and (4 <= len(approx) <= 5)): | |
cv2.drawContours(img2, [hull], -1, (255, 255, 255), 1) | |
grayimg = cv2.cvtColor(img2, cv2.COLOR_RGB2GRAY) | |
# cv2.imshow("filtered edges", grayimg) | |
# cv2.waitKey() | |
lines = cv2.HoughLines(grayimg, 1, np.pi/83, 73) | |
# Draw all Hough lines in red | |
img_with_all_lines = np.copy(img) | |
drawLines(img_with_all_lines, lines) | |
# cv2.imshow("Hough lines", img_with_all_lines) | |
# cv2.waitKey() | |
# Cluster line angles into 2 groups (vertical and horizontal) | |
segmented = segment_by_angle_kmeans(lines, 2) | |
# Find the intersections of each vertical line with each horizontal line | |
intersections = segmented_intersections(segmented) | |
img_with_segmented_lines = np.copy(img) | |
# https://pastiebin.com/5f36425b7ae3d | |
# Draw intersection points in magenta | |
for point in intersections: | |
pt = (point[0][0], point[0][1]) | |
img_with_segmented_lines = cv2.circle( | |
img_with_segmented_lines, pt, 3, (255, 0, 0), -1) | |
# cv2.imshow("Segmented lines", img_with_segmented_lines) | |
# cv2.waitKey() | |
src_pts = np.array([intersections[0], intersections[1], | |
intersections[2], intersections[3]], dtype=np.int32) | |
warped = four_point_transform(img, src_pts.reshape(4, 2)) | |
# cv2.imshow("Perpesctive transform", warped) | |
# cv2.waitKey() | |
titles = ['Original Image', 'Edge Detection', 'Filtered Edges', | |
'Hough Lines', 'Segmented Lines', 'Perspective Transform'] | |
images = [img, edged, grayimg, img_with_all_lines, | |
img_with_segmented_lines, warped] | |
for i in range(len(titles)): | |
plt.subplot(2, 3, i+1) | |
plt.imshow(images[i], 'gray') | |
plt.title(titles[i]) | |
plt.xticks([]), plt.yticks([]) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment