Skip to content

Instantly share code, notes, and snippets.

@rasky
Last active January 21, 2018 15:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rasky/5f2a2b33b81b3673bca6934d0d1e97c0 to your computer and use it in GitHub Desktop.
Save rasky/5f2a2b33b81b3673bca6934d0d1e97c0 to your computer and use it in GitHub Desktop.
Bloom filter parameters simulation
#!/usr/bin/python3
# -*- encoding: utf-8 -*-
import math
N0 = 128 # number of 8-byte ints in intlist before we switch to bloom
E = 0.003 # desired false-positive error rate
P = 0.5 # desired fill ratio
R = 0.85 # tightening ratio
S = 2.0 # growth ratio
def log2(x):
return math.log(x) / math.log(2)
class AsciiOutput:
FMT = "%2s: %4s %5s %16s | %16s %16s | %16s %16s"
def __init__(self):
print(self.FMT % ("", "Hash", "Checks", "Partition size", "Step size", "Step count", "Tot size", "Tot count"))
print("-"*110)
@staticmethod
def intWithCommas(x):
if x < 0:
return '-' + intWithCommas(-x)
result = ''
while x >= 1000:
x, r = divmod(x, 1000)
result = ",%03d%s" % (r, result)
return "%d%s" % (x, result)
def printline(self, *args):
args = list(args)
args[3:] = [self.intWithCommas(a) for a in args[3:]]
print(self.FMT % tuple(args))
def finalize(self):
pass
out = AsciiOutput()
# Compute first N so that first M (memory size) is about double the size
# of the first intlist. Doubling memory size as we grow is an acceptable
# pattern.
E0 = E * (1-R) * 3
N0 = int(S * N0 * 64 * ((math.log(P) * math.log(1.0-P)) / abs(math.log(E0))))
cnt = 0
tot = 0
hfn = 0
for i in range(24):
e = E0 * math.pow(R, i)
n = N0 * math.pow(S, i)
k = int(math.ceil(-log2(e)))
m = n / ((math.log(P) * math.log(1.0-P)) / abs(math.log(e)))
m = int((m+7)/8)
m = (m + 63) / 64 * 64
tot += m
cnt += n
hfn += k
out.printline(i, k, hfn, int(m/k), m, n, tot, cnt)
out.finalize()
@rasky
Copy link
Author

rasky commented Feb 5, 2017

Output with default parameters:

  : Hash Checks   Partition size |        Step size       Step count |         Tot size        Tot count
--------------------------------------------------------------------------------------------------------------
 0:   10    10              204 |            2,048            1,191 |            2,048            1,191
 1:   10    20              422 |            4,224            2,382 |            6,272            3,573
 2:   11    31              785 |            8,640            4,764 |           14,912            8,337
 3:   11    42            1,600 |           17,600            9,528 |           32,512           17,865
 4:   11    53            3,275 |           36,032           19,056 |           68,544           36,921
 5:   11    64            6,690 |           73,600           38,112 |          142,144           75,033
 6:   11    75           13,672 |          150,400           76,224 |          292,544          151,257
 7:   12    87           25,600 |          307,200          152,448 |          599,744          303,705
 8:   12    99           52,277 |          627,328          304,896 |        1,227,072          608,601
 9:   12   111          106,698 |        1,280,384          609,792 |        2,507,456        1,218,393
10:   12   123          217,690 |        2,612,288        1,219,584 |        5,119,744        2,437,977
11:   13   136          409,826 |        5,327,744        2,439,168 |       10,447,488        4,877,145
12:   13   149          835,515 |       10,861,696        4,878,336 |       21,309,184        9,755,481
13:   13   162        1,702,764 |       22,135,936        9,756,672 |       43,445,120       19,512,153
14:   13   175        3,468,992 |       45,096,896       19,513,344 |       88,542,016       39,025,497
15:   14   189        6,560,278 |       91,843,904       39,026,688 |      180,385,920       78,052,185
16:   14   203       13,356,292 |      186,988,096       78,053,376 |      367,374,016      156,105,561
17:   14   217       27,184,054 |      380,576,768      156,106,752 |      747,950,784      312,212,313
18:   14   231       55,311,058 |      774,354,816      312,213,504 |    1,522,305,600      624,425,817
19:   14   245      112,508,004 |    1,575,112,064      624,427,008 |    3,097,417,664    1,248,852,825
20:   15   260      213,535,266 |    3,203,028,992    1,248,854,016 |    6,300,446,656    2,497,706,841
21:   15   275      434,111,193 |    6,511,667,904    2,497,708,032 |   12,812,114,560    4,995,414,873
22:   15   290      882,303,709 |   13,234,555,648    4,995,416,064 |   26,046,670,208    9,990,830,937
23:   15   305    1,792,770,065 |   26,891,550,976    9,990,832,128 |   52,938,221,184   19,981,663,065

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment