/similarity-test.py

## similarity-test.py
from cv.bow import BoWEngine
from scipy.spatial.distance import cosine
import cv2

engine = BoWEngine(
        kmeans_library = 'faiss',
        dictionary_size = 10000,
        dictionary_path = './out/dict-kmcuda-10k',
)

# http://stackoverflow.com/questions/13661051/how-does-vl-ubcmatch-work-technically
# Slow! Could speed up with faiss knn index with k = 2 to get 2nd closest
def ubcmatch(f1, f2):
    matches = []
    threshold = 1.5
    for desc1 in f1:
        closest_match_distance = float('inf')
        second_closest_match_distance = float('inf')
        best_match = None
        for desc2 in f2:
            distance = cosine(desc1, desc2)
            if distance < closest_match_distance:
                second_closest_match_distance = closest_match_distance
                closest_match_distance = distance
                best_match = desc2
        if best_match is None:
            continue
        above_second_best_match = threshold * closest_match_distance < second_closest_match_distance
        if best_match is not None and above_second_best_match:
            matches.append([desc1, best_match, closest_match_distance])
    return matches

# feature by feature
def ubc_dist(p1, p2):
    im1 = cv2.imread(p1)
    im2 = cv2.imread(p2)
    f1 = engine.extract_features(im1)
    f2 = engine.extract_features(im2)
    matches = ubcmatch(f1, f2)
    d = 1 - (len(matches) / len(f1))
    print('ubc_dist(%s, %s) = %.3f' % (p1, p2, d))
    return d

# with bag of words
def bow_dist(p1, p2):
    im1 = cv2.imread(p1)
    im2 = cv2.imread(p2)
    b1 = engine.get_bow_vector(im1)
    b2 = engine.get_bow_vector(im2)
    d = cosine(b1, b2)
    print('bow_dist(%s, %s) = %.3f' % (p1, p2, d))
    return d

ubc_dist('./images/lenna.jpg', './images/lenna.jpg')
ubc_dist('./images/lenna.jpg', './datasets/flickr25k/im10055.jpg')
# Why is lenna-cropped more similar to lenna than lenna-smaller?
ubc_dist('./images/lenna.jpg', './images/lenna-smaller.jpg')
ubc_dist('./images/lenna.jpg', './images/lenna-cropped.jpg')
print('')
bow_dist('./images/lenna.jpg', './images/lenna.jpg')
bow_dist('./images/lenna.jpg', './datasets/flickr25k/im10055.jpg')
# Why is lenna-cropped more similar to lenna than lenna-smaller?
bow_dist('./images/lenna.jpg', './images/lenna-smaller.jpg')
bow_dist('./images/lenna.jpg', './images/lenna-cropped.jpg')
	from cv.bow import BoWEngine
	from scipy.spatial.distance import cosine
	import cv2

	engine = BoWEngine(
	kmeans_library = 'faiss',
	dictionary_size = 10000,
	dictionary_path = './out/dict-kmcuda-10k',
	)

	# http://stackoverflow.com/questions/13661051/how-does-vl-ubcmatch-work-technically
	# Slow! Could speed up with faiss knn index with k = 2 to get 2nd closest
	def ubcmatch(f1, f2):
	matches = []
	threshold = 1.5
	for desc1 in f1:
	closest_match_distance = float('inf')
	second_closest_match_distance = float('inf')
	best_match = None
	for desc2 in f2:
	distance = cosine(desc1, desc2)
	if distance < closest_match_distance:
	second_closest_match_distance = closest_match_distance
	closest_match_distance = distance
	best_match = desc2
	if best_match is None:
	continue
	above_second_best_match = threshold * closest_match_distance < second_closest_match_distance
	if best_match is not None and above_second_best_match:
	matches.append([desc1, best_match, closest_match_distance])
	return matches

	# feature by feature
	def ubc_dist(p1, p2):
	im1 = cv2.imread(p1)
	im2 = cv2.imread(p2)
	f1 = engine.extract_features(im1)
	f2 = engine.extract_features(im2)
	matches = ubcmatch(f1, f2)
	d = 1 - (len(matches) / len(f1))
	print('ubc_dist(%s, %s) = %.3f' % (p1, p2, d))
	return d

	# with bag of words
	def bow_dist(p1, p2):
	im1 = cv2.imread(p1)
	im2 = cv2.imread(p2)
	b1 = engine.get_bow_vector(im1)
	b2 = engine.get_bow_vector(im2)
	d = cosine(b1, b2)
	print('bow_dist(%s, %s) = %.3f' % (p1, p2, d))
	return d

	ubc_dist('./images/lenna.jpg', './images/lenna.jpg')
	ubc_dist('./images/lenna.jpg', './datasets/flickr25k/im10055.jpg')
	# Why is lenna-cropped more similar to lenna than lenna-smaller?
	ubc_dist('./images/lenna.jpg', './images/lenna-smaller.jpg')
	ubc_dist('./images/lenna.jpg', './images/lenna-cropped.jpg')
	print('')
	bow_dist('./images/lenna.jpg', './images/lenna.jpg')
	bow_dist('./images/lenna.jpg', './datasets/flickr25k/im10055.jpg')
	# Why is lenna-cropped more similar to lenna than lenna-smaller?
	bow_dist('./images/lenna.jpg', './images/lenna-smaller.jpg')
	bow_dist('./images/lenna.jpg', './images/lenna-cropped.jpg')