Skip to content

Instantly share code, notes, and snippets.

@cjauvin
Last active August 29, 2015 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cjauvin/9652c837ef2484fb55e1 to your computer and use it in GitHub Desktop.
Save cjauvin/9652c837ef2484fb55e1 to your computer and use it in GitHub Desktop.
import random
from hashlib import md5, sha1
from collections import defaultdict
from scipy.stats import hmean
import numpy as np
import redis
import sys
def get_n_zeros(v, n_bits=128, from_right=True):
"""Get number of trailing or leading 0s in the binary
representation of v.
"""
for n, i in enumerate(range(n_bits)[::(1 if from_right else -1)]):
if v & 2 ** i:
break
else:
n += 1
return n
N = 500000
stream = np.random.randint(0, sys.maxsize, size=N)
b = 14 # number of bits for registers
m = 2 ** b # number of registers
M = defaultdict(int) # registers
from_right = True # most/least significant bits
h_func = sha1
# used if left == True
h_func_n_bits = 128 if h_func is md5 else 160
h_func_n_bits -= b
if m == 16: # b = 4
alpha = 0.673
elif m == 32: # b = 5
alpha = 0.697
elif m == 64: # b = 6
alpha = 0.709
else:
alpha = 0.7213 / (1 + 1.079 / m)
for v in stream:
x = int(h_func(str(v).encode()).hexdigest(), 16) # between 0 and 2^128 - 1
if not from_right:
mask = (m - 1) << h_func_n_bits
j = x & mask >> h_func_n_bits
assert j >= 0 and j < m
x = x & (mask ^ (m - 1)) # value part
x = x >> b
#x_bin = ('{:0%db}' % h_func_n_bits).format(x)
#nz = len(x_bin) - len(x_bin.lstrip('0'))
else:
j = x & (m - 1) # register part
assert j >= 0 and j < m
x = x >> b # value part
#x_bin = bin(x)
#nz = len(x_bin) - len(x_bin.rstrip('0'))
nz = get_n_zeros(x, n_bits=h_func_n_bits, from_right=from_right)
M[j] = max(M[j], nz + 1)
M = np.asarray(list(M.values()))
E = alpha * (m ** 2) / np.sum(2. ** -M)
#print(E)
if E <= (5 / 2 * m):
#V = len(M[M == 0]) # number of zero registers
V = m - len(M)
if V:
E = m * np.log(m / V)
elif E > (1 / 30. * (2 ** 32)):
E = -2 ** 32 * np.log(1 - E / 2 ** 32)
print(E)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment