Skip to content

Instantly share code, notes, and snippets.

@fabiolimace
Last active September 21, 2021 11:17
Show Gist options
  • Save fabiolimace/507eac3d35900050eeb9772e5b1871ba to your computer and use it in GitHub Desktop.
Save fabiolimace/507eac3d35900050eeb9772e5b1871ba to your computer and use it in GitHub Desktop.
Compare hashes in Python
#!/usr/bin/python3
import zlib
import base64
import random
import string
import hashlib
def crc32(text):
return zlib.crc32(text.encode())
def adl32(text):
return zlib.adler32(text.encode())
def mix32(text):
return zlib.crc32(text.encode()) ^ zlib.adler32(text.encode())
def mix64(text):
return zlib.crc32(text.encode()) << 32 | zlib.adler32(text.encode())
def pyt32(text):
return hash(text) >> 32
def pyt64(text):
return hash(text)
def sha32(text):
return int.from_bytes(hashlib.sha1(text.encode()).digest()[:4], byteorder='big')
def sha64(text):
return int.from_bytes(hashlib.sha1(text.encode()).digest()[:8], byteorder='big')
def random_string():
length = random.choice(range(1, 16))
return ''.join(random.choices(string.ascii_lowercase, k=length))
def random_strings(size):
strings = set()
while len(strings) < size:
strings.add(random_string())
return strings
size = 10**7 # 10,000,000
strings = random_strings(size)
crc32_hashes = []
adl32_hashes = []
mix32_hashes = []
mix64_hashes = []
pyt32_hashes = []
pyt64_hashes = []
sha32_hashes = []
sha64_hashes = []
for i in strings:
crc32_hashes.append(crc32(i))
for i in strings:
adl32_hashes.append(adl32(i))
for i in strings:
mix32_hashes.append(mix32(i))
for i in strings:
mix64_hashes.append(mix64(i))
for i in strings:
pyt32_hashes.append(pyt32(i))
for i in strings:
pyt64_hashes.append(pyt64(i))
for i in strings:
sha32_hashes.append(sha32(i))
for i in strings:
sha64_hashes.append(sha64(i))
print()
print('SIZE: ', size)
print()
print('UNIQUENESS 32:')
print(' - CRC32 ', 1.0 * len(set(crc32_hashes)) / len(crc32_hashes))
print(' - ADL32 ', 1.0 * len(set(adl32_hashes)) / len(adl32_hashes))
print(' - CRC32 ^ ADL32 ', 1.0 * len(set(mix32_hashes)) / len(mix32_hashes))
print(' - PYTHON HASH 32 ', 1.0 * len(set(pyt32_hashes)) / len(pyt32_hashes))
print(' - SHA1 HASH 32 ', 1.0 * len(set(sha32_hashes)) / len(sha32_hashes))
print()
print('UNIQUENESS 64:')
print(' - CRC32<<32 | ADL32 ', 1.0 * len(set(mix64_hashes)) / len(mix64_hashes))
print(' - PYTHON HASH 64 ', 1.0 * len(set(pyt64_hashes)) / len(pyt64_hashes))
print(' - SHA1 HASH 64 ', 1.0 * len(set(sha64_hashes)) / len(sha64_hashes))
print()
@fabiolimace
Copy link
Author

fabiolimace commented Sep 5, 2021

OUTPUT:

SIZE:  10000000

UNIQUENESS 32:
 - CRC32              0.9987718
 - ADL32              0.055102
 - CRC32 ^ ADL32      0.9988351
 - PYTHON HASH 32     0.9988358
 - SHA1 HASH 32       0.9988395

UNIQUENESS 64:
 - CRC32<<32 | ADL32  1.0
 - PYTHON HASH 64     1.0
 - SHA1 HASH 64       1.0

TIME:

real	2m15,187s
user	2m9,221s
sys	0m4,342s

@fabiolimace
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment