Skip to content

Instantly share code, notes, and snippets.

@dagss
Created June 2, 2012 20:16
Show Gist options
  • Save dagss/2859798 to your computer and use it in GitHub Desktop.
Save dagss/2859798 to your computer and use it in GitHub Desktop.
from __future__ import division
import numpy as np
import sys
def simulate(nitems, nslots, nsims=100):
k = int(np.log2(nslots))
assert 2**k == nslots
success_count = 0
for isim in range(nsims):
hashes = np.random.randint(2**32, size=nitems).astype(np.uint64)
hashes <<= 32
hashes |= np.random.randint(2**32, size=nitems)
# Try to not find collisions
m = 2**k - 1
no_collisions = False
for r1 in range(64 - k):
for r2 in range(64 - k):
subhashes = ((hashes >> r1) ^ (hashes >> r2)) & m
max_occ = np.max(np.bincount(subhashes.astype(np.uint16)))
if max_occ == 1:
# no collisions
no_collisions = True
if no_collisions:
success_count += 1
return success_count / nsims
for nitems, nslots in [(6, 8),
(8, 8),
(20, 32),
(20, 64),
(30, 64),
(30, 128),
(64, 256),
(64, 512)]:
print 'nitems=%d, nslots=%d, P(success)=%.2f' % (nitems,
nslots,
simulate(nitems, nslots))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment