Skip to content

Instantly share code, notes, and snippets.

@mgilson
Created January 5, 2017 17:24
Show Gist options
  • Save mgilson/129859a79487a483163980db25b709bf to your computer and use it in GitHub Desktop.
Save mgilson/129859a79487a483163980db25b709bf to your computer and use it in GitHub Desktop.
Hash an iterable that contains hashable items as if it were a tuple.
from distutils.core import setup
from Cython.Build import cythonize
setup(
name="tuple_hash",
ext_modules=cythonize('tuple_hash.pyx'),
)
import tuple_hash
import timeit
lst = list(range(100))
def hash_cyx(lst=lst):
"""Hashes the input iterable as if it were a tuple."""
return tuple_hash.hash_iterable_as_tuple(lst)
def hash_tuple_create(lst=lst):
"""Creates a tuple, then hashes it."""
return hash(tuple(lst))
def hash_tuple(tup=tuple(lst)):
"""Hash a tuple."""
return hash(tup)
# Check correctness
for i in range(100):
lst = list(range(i))
assert hash_cyx(lst) == hash_tuple_create(lst)
# works for iterables -- not just sequences
assert tuple_hash.hash_iterable_as_tuple(iter([1, 2, 3])) == hash((1, 2, 3))
# CHECKME: does it work (well) for iterables that don't have a length_hint?
print('-' * 80)
print(timeit.timeit('hash_cyx()', 'from __main__ import hash_cyx'))
print(timeit.timeit('hash_tuple_create()', 'from __main__ import hash_tuple_create'))
print(timeit.timeit('hash_tuple()', 'from __main__ import hash_tuple'))
import operator
def hash_iterable_as_tuple(iterable):
cdef long x, y, mult
cdef long length
mult = 1000003L
x = 0x345678L
# The C implementation updates mult using the length of the tuple.
# I don't know why. When hashing a general iterable, the length isn't
# known so we use the length_hint.
length = operator.length_hint(iterable)
for item in iterable:
length -= 1
y = hash(item)
if y == -1:
return -1
x = (x ^ y) * mult
mult += <long>(82520L + length + length)
x += 97531L
if x == -1:
x = -2
return x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment