Skip to content

Instantly share code, notes, and snippets.

@SaveTheRbtz
Forked from verm666/gist:2049204
Created March 19, 2012 15:55
Show Gist options
  • Save SaveTheRbtz/2117117 to your computer and use it in GitHub Desktop.
Save SaveTheRbtz/2117117 to your computer and use it in GitHub Desktop.
djb2.py
#!/usr/bin/env
# -*- coding: utf-8 -*-
from collections import Counter
from contextlib import contextmanager
def djb2(uuid):
""" See for details: http://www.cse.yorku.ca/~oz/hash.html """
_hash = 5381
for i in xrange(0, len(uuid)):
_hash = ((_hash << 5) + _hash) + ord(uuid[i])
return _hash
def stats(iterable):
""" Returns mean, median and std.dev of iterable"""
try:
import numpy as np
except ImportError:
return (0,0,0)
data = list(iterable)
return [func(data) for func in [np.mean, np.median, np.std]]
@contextmanager
def benchmark():
import time
start = time.time()
yield
print "Total time: {0}".format(time.time() - start)
if __name__ == "__main__":
import uuid
from zlib import crc32
from hashlib import md5, sha256
result = {}
hash_to_long = lambda x: int(x.hexdigest(), base=16)
hashes = [
(djb2, lambda x: x),
(crc32, lambda x: x),
(md5, hash_to_long),
(sha256, hash_to_long),
]
for func, convert in hashes:
print func.__name__
with benchmark():
histogram = Counter(convert(func(uuid.uuid1().get_hex())) % 16 for _ in xrange(100000))
print histogram
print stats(histogram.values())
print
@SaveTheRbtz
Copy link
Author

This shows that djb2 is not so uniform =(

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment