Skip to content

Instantly share code, notes, and snippets.

@youngsoul
Created January 1, 2019 03:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save youngsoul/b5014604538b8fddc5c1bc8761fe0efc to your computer and use it in GitHub Desktop.
Save youngsoul/b5014604538b8fddc5c1bc8761fe0efc to your computer and use it in GitHub Desktop.
from scipy.spatial import distance as dist
import matplotlib.pyplot as plt
import numpy as np
import argparse
import glob
import cv2
"""
https://www.pyimagesearch.com/2014/07/14/3-ways-compare-histograms-using-opencv-python/
My implementation of hte Blog post above. I believe since its writing
some of the library APIs have changed.
python: 3.6.6
cv2: 3.4.3
"""
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="Path to the directory of images")
args = vars(ap.parse_args())
# initialize the index dictionary to store the image name
# and corresponding histograms and the images dictionary
# to store the images themselves
index = {}
images = {}
# loop over the image paths
for image_path in glob.glob(f"{args['dataset']}/*.png"):
# extract the image filename, assumed to be unique and
# load the image, updating the image dictionary
filename = image_path[image_path.rfind('/') + 1:]
image = cv2.imread(image_path)
images[filename] = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# extract a 3D RGB color historgram from the image
# using 8 bins per channel, normalize, and update the index
hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
# update: normalize needs the source array AND
# the dest array
# returns void
cv2.normalize(hist, hist).flatten()
index[filename] = hist
# METHOD #1: UTILIZING OPENCV
# initialize opencv methods for histogram comparison
OPENCV_METHODS = (
("Correlation", cv2.HISTCMP_CORREL),
("Chi-Squared", cv2.HISTCMP_CHISQR),
("Intersection", cv2.HISTCMP_INTERSECT),
("Hellinger", cv2.HISTCMP_BHATTACHARYYA)
)
for (methodName, method) in OPENCV_METHODS:
# initialize the results dictionary and the sort
# direction
results = {}
reverse = False
# if we are using the correlation or intersection
# method, then sort the results in reverse order
if methodName in ("Correlation", "Intersection"):
reverse = True
# loop over the index
for (k, hist) in index.items():
# compute the distance between the two histograms
# using the method and update the results dictionary
d = cv2.compareHist(index["doge.png"], hist, method)
results[k] = d
# sort the results
results = sorted([(v, k) for (k, v) in results.items()], reverse=reverse)
# show the query image
fig = plt.figure("Query")
ax = fig.add_subplot(1, 1, 1)
ax.imshow(images["doge.png"])
plt.axis("off")
# initialize the results figure
fig = plt.figure("Results: %s" % (methodName))
fig.suptitle(methodName, fontsize=20)
# loop over the results
for (i, (v, k)) in enumerate(results):
# show the result
ax = fig.add_subplot(1, len(images), i + 1)
ax.set_title("%s: %.2f" % (k, v))
plt.imshow(images[k])
plt.axis("off")
# show the OpenCV methods
# plt.show()
# METHOD #2: UTILIZING SCIPY
# initialize the scipy methods to compaute distances
SCIPY_METHODS = (
("Euclidean", dist.euclidean),
("Manhattan", dist.cityblock),
("Chebysev", dist.chebyshev))
# loop over the comparison methods
for (methodName, method) in SCIPY_METHODS:
# initialize the dictionary dictionary
results = {}
# loop over the index
for (k, hist) in index.items():
# compute the distance between the two histograms
# using the method and update the results dictionary
d = method(index["doge.png"].flatten(), hist.flatten())
results[k] = d
# sort the results
results = sorted([(v, k) for (k, v) in results.items()])
# show the query image
fig = plt.figure("Query")
ax = fig.add_subplot(1, 1, 1)
ax.imshow(images["doge.png"])
plt.axis("off")
# initialize the results figure
fig = plt.figure("Results: %s" % (methodName))
fig.suptitle(methodName, fontsize=20)
# loop over the results
for (i, (v, k)) in enumerate(results):
# show the result
ax = fig.add_subplot(1, len(images), i + 1)
ax.set_title("%s: %.2f" % (k, v))
plt.imshow(images[k])
plt.axis("off")
# show the SciPy methods
plt.show()
# METHOD #3: ROLL YOUR OWN
def chi2_distance(histA, histB, eps=1e-10):
# compute the chi-squared distance
d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps)
for (a, b) in zip(histA, histB)])
# return the chi-squared distance
return d
# initialize the results dictionary
results = {}
# loop over the index
for (k, hist) in index.items():
# compute the distance between the two histograms
# using the custom chi-squared method, then update
# the results dictionary
d = chi2_distance(index["doge.png"], hist)
results[k] = d
# sort the results
results = sorted([(v, k) for (k, v) in results.items()])
# show the query image
fig = plt.figure("Query")
ax = fig.add_subplot(1, 1, 1)
ax.imshow(images["doge.png"])
plt.axis("off")
# initialize the results figure
fig = plt.figure("Results: Custom Chi-Squared")
fig.suptitle("Custom Chi-Squared", fontsize=20)
# loop over the results
for (i, (v, k)) in enumerate(results):
# show the result
ax = fig.add_subplot(1, len(images), i + 1)
ax.set_title("%s: %.2f" % (k, v))
plt.imshow(images[k])
plt.axis("off")
# show the custom method
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment