Skip to content

Instantly share code, notes, and snippets.

@danlmarmot
Created January 19, 2014 19:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save danlmarmot/8509517 to your computer and use it in GitHub Desktop.
Save danlmarmot/8509517 to your computer and use it in GitHub Desktop.
This Python script measures hash performance, and attempts to answer this question: 'Given a hash function and a block size for reading, which is the fastest hash function and block size?'
__author__ = 'danlmarmot'
"""
This Python script measures hash performance, and attempts to answer this question:
'Given a hash function and a block size for reading, which is the fastest hash function and block size?'
There's no warm up, there's no cache/memory/whatever filling, just a simple way to explore performance for
commonly used functions with the timeit library
"""
import timeit
import hashlib
import zlib
import inspect
import sys
# Constants
#*** block sizes for reading ***
# these are sensible defaults
MIN_BLOCK_SIZE = 10
MAX_BLOCK_SIZE = 24
# Number of times to run each combination of hash and block size
NUM_ITERATIONS = 10
# what file to use
TEST_FILE = "big.txt"
def main():
get_timings()
def get_timings():
func_list = [f for f, v in inspect.getmembers(sys.modules['__main__'], callable) if f.startswith('t_')]
results = {}
for f in func_list:
for i in range(MIN_BLOCK_SIZE, MAX_BLOCK_SIZE):
func = "%s(filename='%s',block_size=2**%s)" % (f, TEST_FILE, i)
x = timeit.timeit(func, setup="from __main__ import " + f, number=NUM_ITERATIONS)
print func + ": " + str(x)
results[func] = x
print "\nFastest results:"
print min(results, key=results.get)
def t_md5sum(filename="big.txt", block_size=2 ** 16):
md5 = hashlib.md5()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
md5.update(chunk)
return md5.hexdigest()
def t_sha1sum(filename="big.txt", block_size=2 ** 16):
sha1 = hashlib.sha1()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
sha1.update(chunk)
return sha1.hexdigest()
def t_sha1sum(filename="big.txt", block_size=2 ** 16):
sha1 = hashlib.sha1()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
sha1.update(chunk)
return sha1.hexdigest()
def t_sha224sum(filename="big.txt", block_size=2 ** 16):
sha = hashlib.sha224()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
sha.update(chunk)
return sha.hexdigest()
def t_sha256sum(filename="big.txt", block_size=2 ** 16):
sha = hashlib.sha256()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
sha.update(chunk)
return sha.hexdigest()
def t_sha384sum(filename="big.txt", block_size=2 ** 16):
sha = hashlib.sha384()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
sha.update(chunk)
return sha.hexdigest()
def t_sha512sum(filename="big.txt", block_size=2 ** 16):
sha = hashlib.sha512()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
sha.update(chunk)
return sha.hexdigest()
def t_sha1sum(filename="big.txt", block_size=2 ** 16):
sha1 = hashlib.sha1()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
sha1.update(chunk)
return sha1.hexdigest()
def t_sha1sum(filename="big.txt", block_size=2 ** 16):
sha1 = hashlib.sha1()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
sha1.update(chunk)
return sha1.hexdigest()
def tno_crc32sum(filename="big.txt", block_size=2 ** 16):
checksum = 0
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(block_size), b''):
checksum = zlib.crc32(chunk, checksum)
# Oxffffffff forces checksum to be in range 0 to 2**32-1
return str(checksum & 0xffffffff)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment