faustomorales/describe_transform.py

## describe_transform.py
import perception.hashers.tools as pht
import numpy as np
import cv2

def describe_transformation(image1, image2, sift, matcher, min_match_count=5, min_inliers=0.8):
    """Get a transformation description between two images by using the top-left and
    bottom-right coordinate transformations for each.

    Args:
        image1: The first image
        image2: The second image
        sift: A SIFT feature extractor
        matcher: A BFMatcher instance
        min_match_count: The minimum number of matches between
            two images.
        min_inliers: The minimum fraction of matches for
            which we can fit a transformation matrix.

    Returns:
        A tuple of (w1, h1), (w2, h2), dst1, dst2, <error message>).
        The w and h variables are the widths and heights of the
        input images. dst1 refers to the transformed coordinates
        of the top-left and bottom-right coordinates on image1
        when projected onto image2. dst2 is the inverse of dst1.
    """
    image1 = cv2.cvtColor(image1, cv2.COLOR_RGB2GRAY)
    image2 = cv2.cvtColor(image2, cv2.COLOR_RGB2GRAY)
    kp1, des1 = sift.detectAndCompute(image1, None)
    kp2, des2 = sift.detectAndCompute(image2, None)
    matches = matcher.knnMatch(des1, des2, k=2)

    # Apply ratio test
    good = []
    for m, n in matches:
        if m.distance < 0.75*n.distance:
            good.append(m)
    if len(good) > min_match_count:
        src = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1,1,2)
        dst = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1,1,2)
        M12, mask = cv2.findHomography(src, dst, cv2.RANSAC, 5.0)
        inliers = mask.sum()
        if inliers < min_inliers * len(mask):
            return None, None, None, f"Not enough inliers: {inliers} / {len(mask)}."
        M21 = np.linalg.inv(M12)
        h1, w1 = image1.shape[:2]
        h2, w2 = image2.shape[:2]
        pts1x = np.float32([[0, 0], [w1-1, h1-1]]).reshape(-1, 1, 2)
        pts2x = np.float32([[0, 0], [w2-1, h2-1]]).reshape(-1, 1, 2)
        pts1y = cv2.perspectiveTransform(pts1x, M12).reshape(-1, 2)
        pts2y = cv2.perspectiveTransform(pts2x, M21).reshape(-1, 2)
        return (w1, h1), (w2, h2), (pts1y, pts2y), None
    else:
        return None, None, None, f"Not enough matches: {len(good)} / {min_match_count}."

image1 = cv2.resize(pht.read('https://upload.wikimedia.org/wikipedia/commons/8/8d/President_Barack_Obama.jpg'), (512, 512))

# Pad the image by 20px on each side.
image2 = cv2.resize(np.pad(image1, ((20, 20), (20, 20), (0, 0))), (256, 256))
sift = cv2.SIFT_create()
matcher = cv2.BFMatcher()
describe_transformation(image1=image1, image2=image2, sift=sift, matcher=matcher)

# Yields the following:
#
# (
#  (512, 512), (256, 256),
#  (
#    [[  9,   9]], [[246 , 246]],
#    [[-19, -20 ]], [[532, 530]]
#  ),
#  None
# )
	import perception.hashers.tools as pht
	import numpy as np
	import cv2

	def describe_transformation(image1, image2, sift, matcher, min_match_count=5, min_inliers=0.8):
	"""Get a transformation description between two images by using the top-left and
	bottom-right coordinate transformations for each.

	Args:
	image1: The first image
	image2: The second image
	sift: A SIFT feature extractor
	matcher: A BFMatcher instance
	min_match_count: The minimum number of matches between
	two images.
	min_inliers: The minimum fraction of matches for
	which we can fit a transformation matrix.

	Returns:
	A tuple of (w1, h1), (w2, h2), dst1, dst2, <error message>).
	The w and h variables are the widths and heights of the
	input images. dst1 refers to the transformed coordinates
	of the top-left and bottom-right coordinates on image1
	when projected onto image2. dst2 is the inverse of dst1.
	"""
	image1 = cv2.cvtColor(image1, cv2.COLOR_RGB2GRAY)
	image2 = cv2.cvtColor(image2, cv2.COLOR_RGB2GRAY)
	kp1, des1 = sift.detectAndCompute(image1, None)
	kp2, des2 = sift.detectAndCompute(image2, None)
	matches = matcher.knnMatch(des1, des2, k=2)

	# Apply ratio test
	good = []
	for m, n in matches:
	if m.distance < 0.75*n.distance:
	good.append(m)
	if len(good) > min_match_count:
	src = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1,1,2)
	dst = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1,1,2)
	M12, mask = cv2.findHomography(src, dst, cv2.RANSAC, 5.0)
	inliers = mask.sum()
	if inliers < min_inliers * len(mask):
	return None, None, None, f"Not enough inliers: {inliers} / {len(mask)}."
	M21 = np.linalg.inv(M12)
	h1, w1 = image1.shape[:2]
	h2, w2 = image2.shape[:2]
	pts1x = np.float32([[0, 0], [w1-1, h1-1]]).reshape(-1, 1, 2)
	pts2x = np.float32([[0, 0], [w2-1, h2-1]]).reshape(-1, 1, 2)
	pts1y = cv2.perspectiveTransform(pts1x, M12).reshape(-1, 2)
	pts2y = cv2.perspectiveTransform(pts2x, M21).reshape(-1, 2)
	return (w1, h1), (w2, h2), (pts1y, pts2y), None
	else:
	return None, None, None, f"Not enough matches: {len(good)} / {min_match_count}."

	image1 = cv2.resize(pht.read('https://upload.wikimedia.org/wikipedia/commons/8/8d/President_Barack_Obama.jpg'), (512, 512))

	# Pad the image by 20px on each side.
	image2 = cv2.resize(np.pad(image1, ((20, 20), (20, 20), (0, 0))), (256, 256))
	sift = cv2.SIFT_create()
	matcher = cv2.BFMatcher()
	describe_transformation(image1=image1, image2=image2, sift=sift, matcher=matcher)

	# Yields the following:
	#
	# (
	# (512, 512), (256, 256),
	# (
	# [[ 9, 9]], [[246 , 246]],
	# [[-19, -20 ]], [[532, 530]]
	# ),
	# None
	# )