Skip to content

Instantly share code, notes, and snippets.

@imneonizer
Created May 1, 2021 18:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save imneonizer/11ec411605f72c0f3086f1de4c6c41ed to your computer and use it in GitHub Desktop.
Save imneonizer/11ec411605f72c0f3086f1de4c6c41ed to your computer and use it in GitHub Desktop.
Image hashing techniques
import os
import cv2
import hashlib
import numpy as np
import scipy.fftpack
class HashEngine:
def __init__(self, htype="dhash", hash_size=8):
self.hash_size = hash_size
self.htype = htype
def load_image(self, image):
# either load image from disk or use array from buffer
if isinstance(image, str):
image = cv2.imread(image, 0)
if len(image.shape) >= 3:
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
return cv2.resize(image, (self.hash_size + 1, self.hash_size))
@staticmethod
def arr2hex(arr):
h, s = 0, []
for i, v in enumerate(arr.flatten()):
h += 2**(i % 8) if v else 0
if (i % 8) == 7:
s.append(hex(h)[2:].rjust(2, '0')); h = 0
return "".join(s)
@staticmethod
def hex2arr(hexstr):
l = []
if len(hexstr) != 16:
raise ValueError('The hex string has the wrong length')
for i in range(8):
h = hexstr[i*2:i*2+2]
v = int("0x" + h, 16)
l.append([v & 2**i > 0 for i in range(8)])
return np.array(l)
@staticmethod
def hex2int(hexstr):
return int(hexstr, 16)
@staticmethod
def distance(a, b):
try:
# hamming distance between the integers
return bin(int(a) ^ int(b)).count("1")
except ValueError:
# convert hex2int first
return bin(int(a, 16) ^ int(b, 16)).count("1")
def ahash(self, image):
# average hash
image = self.load_image(image)
avg = image.mean()
diff = image > avg
return self.arr2hex(diff)[:16]
def dhash(self, image):
# difference hash
image = self.load_image(image)
diff = image[:, 1:] > image[:, :-1]
return self.arr2hex(diff)
def phash(self, image):
# perceptual hash
image = self.load_image(image)
dct = scipy.fftpack.dct(scipy.fftpack.dct(image, axis=0), axis=1)
dctlowfreq = dct[:self.hash_size, :self.hash_size]
med = np.median(dctlowfreq)
diff = dctlowfreq > med
return self.arr2hex(diff)
def hexdigest(self, image, htype=None):
htype = htype or self.htype
if htype == "ahash":
return self.ahash(image)
elif htype == "dhash":
return self.dhash(image)
elif htype == "phash":
return self.phash(image)
# all other inbuilt hashing algorithms
algorithm = hashlib.new(htype)
algorithm.update(self.load_image(image))
return algorithm.hexdigest()
H = HashEngine("sha1")
hexd1 = H.hexdigest("sample3.jpg")
intd1 = H.hex2int(hexd1)
print("hexdigest1:", hexd1)
print("intdigest1:", intd1)
print("-"*10)
hexd2 = H.hexdigest("sample3.jpg")
intd2 = H.hex2int(hexd2)
print("hexdigest2:", hexd2)
print("intdigest2:", intd2)
print("-"*10)
hexdistance = H.distance(hexd1, hexd2)
print("hexdistance:", hexdistance)
intdistance = H.distance(intd1, intd2)
print("intdistance:", intdistance)
# hexdigest1: 576f176179c8e3fcb88e86b7f97d82e9b9e2b24f
# intdigest1: 499159616530556630584974124528933127117353431631
# ----------
# hexdigest2: 576f176179c8e3fcb88e86b7f97d82e9b9e2b24f
# intdigest2: 499159616530556630584974124528933127117353431631
# ----------
# hexdistance: 0
# intdistance: 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment