Created
January 5, 2017 17:24
-
-
Save mgilson/129859a79487a483163980db25b709bf to your computer and use it in GitHub Desktop.
Hash an iterable that contains hashable items as if it were a tuple.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from distutils.core import setup | |
from Cython.Build import cythonize | |
setup( | |
name="tuple_hash", | |
ext_modules=cythonize('tuple_hash.pyx'), | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tuple_hash | |
import timeit | |
lst = list(range(100)) | |
def hash_cyx(lst=lst): | |
"""Hashes the input iterable as if it were a tuple.""" | |
return tuple_hash.hash_iterable_as_tuple(lst) | |
def hash_tuple_create(lst=lst): | |
"""Creates a tuple, then hashes it.""" | |
return hash(tuple(lst)) | |
def hash_tuple(tup=tuple(lst)): | |
"""Hash a tuple.""" | |
return hash(tup) | |
# Check correctness | |
for i in range(100): | |
lst = list(range(i)) | |
assert hash_cyx(lst) == hash_tuple_create(lst) | |
# works for iterables -- not just sequences | |
assert tuple_hash.hash_iterable_as_tuple(iter([1, 2, 3])) == hash((1, 2, 3)) | |
# CHECKME: does it work (well) for iterables that don't have a length_hint? | |
print('-' * 80) | |
print(timeit.timeit('hash_cyx()', 'from __main__ import hash_cyx')) | |
print(timeit.timeit('hash_tuple_create()', 'from __main__ import hash_tuple_create')) | |
print(timeit.timeit('hash_tuple()', 'from __main__ import hash_tuple')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import operator | |
def hash_iterable_as_tuple(iterable): | |
cdef long x, y, mult | |
cdef long length | |
mult = 1000003L | |
x = 0x345678L | |
# The C implementation updates mult using the length of the tuple. | |
# I don't know why. When hashing a general iterable, the length isn't | |
# known so we use the length_hint. | |
length = operator.length_hint(iterable) | |
for item in iterable: | |
length -= 1 | |
y = hash(item) | |
if y == -1: | |
return -1 | |
x = (x ^ y) * mult | |
mult += <long>(82520L + length + length) | |
x += 97531L | |
if x == -1: | |
x = -2 | |
return x |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment