Skip to content

Instantly share code, notes, and snippets.

@johnbumgarner
Last active August 25, 2022 04:41
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnbumgarner/265f3137592a3cf5ba670e84ae404e65 to your computer and use it in GitHub Desktop.
Save johnbumgarner/265f3137592a3cf5ba670e84ae404e65 to your computer and use it in GitHub Desktop.
This function is designed to generate comparison scores between two image using aHash from ImageHash.
######################################################################################
# The concurrent.futures module is part of the standard Python library which provides
# a high level API for launching asynchronous tasks.
######################################################################################
import concurrent.futures
######################################################################################
# The Python module Pillow is the folk of PIL, the Python Imaging Library
# reference: https://pillow.readthedocs.io/en/3.0.x/index.html
######################################################################################
# This module is used to load images
from PIL import Image
#######################################################################################################################
# This Python module was developed by Johannes Bucher
# source: https://github.com/JohannesBuchner/imagehash
#
# The module has 4 hashing methods:
#
# 1. aHash - average hash, for each of the pixels output 1 if the pixel is bigger or equal to the average and
# 0 otherwise.
#
# 2. pHash - perceptive hash, does the same as aHash, but first it does a Discrete Cosine Transformation
#
# 3. dHash - gradient hash, calculate the difference for each of the pixel and compares the difference with the
# average differences.
#
# 4. wavelet - wavelet hashing, works in the frequency domain as pHash but it uses Discrete Wavelet Transformation
# (DWT) instead of DCT
#
# aHash, pHash and dHash all use the same approach:
# 1. Scale an image into a grayscale 8x8 image
# 2. Performs some calculations for each of these 64 pixels and assigns a binary 1 or 0 value. These 64 bits form
# the output of algorithm
#
#######################################################################################################################
import imagehash
#############################################################################################
# The OS module in provides functions for interacting with the operating system.
#
# os.path() provides various functions to handle pathnames.
# os.walk() generate the file names in a directory tree by walking the tree.
#############################################################################################
from os import path, walk
def compute_average_hash_computational_score_for_base_image(base_image):
"""
Calculates the average hash (aHash) for a given image. The aHash (also called Mean Hash) algorithm
crunches the an image into a grayscale 8x8 image and sets the 64 bits in the hash based on whether
the pixel's value is greater than the average color for the image.
:param base_image: image to calculate the average hash for
:return: primary image hash
"""
# load the image and compute its average hash
base_hash = imagehash.average_hash(Image.open(base_image))
return base_hash
def compute_average_hash_computational_score_for_comparison_image(comparison_image):
"""
Calculates the average hash (aHash) for a given image. The aHash (also called Mean Hash) algorithm
crunches the an image into a grayscale 8x8 image and sets the 64 bits in the hash based on whether
the pixel's value is greater than the average color for the image.
:param comparison_image: image to calculate the average hash for
:return: comparison image hash
"""
# load the image and compute its average hash
comparison_hash = imagehash.average_hash(Image.open(comparison_image))
return comparison_hash
def determine_image_similarities(base_hash, comparison_hash):
"""
Determine the computational score between two different images.
:param base_hash: base image hash
:param comparison_hash: comparison image hash
:return: computational score
"""
computational_score = (base_hash - comparison_hash)
return computational_score
image_folder = 'image_directory'
primary_hash = compute_average_hash_computational_score_for_base_image('base_image_name.jpeg')
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as thread_pool_executor:
for (directory_path, directory_names, file_names) in walk(image_folder):
for file_name in file_names:
accepted_extensions = ('.bmp', '.gif', '.jpg', '.jpeg', '.png', '.svg', '.tiff')
if file_name.endswith(accepted_extensions):
future = [thread_pool_executor.submit(compute_average_hash_computational_score_for_comparison_image,
path.join(directory_path, file_name))]
for hash_to_compare in concurrent.futures.as_completed(future):
comparison_score = determine_image_similarities(primary_hash, hash_to_compare.result())
print(comparison_score)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment