Create a gist now

Instantly share code, notes, and snippets.

Proposed Python hashdos fix
#copied from pypy to make it convenient to run the code
def intmask(n):
if isinstance(n, int):
return int(n) # possibly bool->int
assert not isinstance(n, float)
n = long(n)
n &= 18446744073709551615
if n >= 9223372036854775808:
n -= 2*9223372036854775808
return int(n)
#Original PyPY version
#--------------------------------------
def _hash_string(s):
"""The algorithm behind compute_hash() for a string or a unicode."""
length = len(s)
if length == 0:
return -1
x = ord(s[0]) << 7
i = 0
while i < length:
x = intmask((1000003*x) ^ ord(s[i]))
i += 1
x ^= length
return intmask(x)
#Proposed replacement
#--------------------------------------
import os, array
size_exponent = 14 #adjust as a memory/security tradeoff
r = array.array('l', os.urandom(2**size_exponent))
len_r = len(r)
def _hash_string2(s):
"""The algorithm behind compute_hash() for a string or a unicode."""
length = len(s)
#print s
if length == 0:
return -1
x = (ord(s[0]) << 7) ^ r[length % len_r]
i = 0
while i < length:
x = intmask((1000003*x) ^ ord(s[i]))
x ^= r[x % len_r]
i += 1
x ^= length
return intmask(x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment