Last active
August 30, 2015 12:53
-
-
Save mkoehrsen/ce931b6462f4bb7835c7 to your computer and use it in GitHub Desktop.
Generates the probability distribution describing the maximum number of leading zeroes seen in N random bit-strings, for successive values of N. This is related to the HyperLogLog cardinality estimator, as discussed at http://mkoehrsen.github.io/probability/data-analysis/2015/08/28/hyperloglog-estimator.html.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
def generate_max_zero_probabilities(maxZ): | |
maxZ += 1 | |
row1 = numpy.array(list(map(lambda x: pow(2,-(x+1)),range(maxZ)))) | |
row1_cum = row1.cumsum() - row1 # non-inclusive cumsum | |
yield (1,row1) | |
prev_row = row1 | |
prev_row_cum = row1_cum | |
n = 2 | |
while True: | |
new_row = prev_row_cum * row1 + prev_row * row1_cum + prev_row * row1 | |
yield (n,new_row) | |
prev_row = new_row | |
prev_row_cum = prev_row.cumsum() - prev_row | |
n+=1 | |
def print_table(rowgen,maxN): | |
(n,row) = next(rowgen) | |
while n < maxN: | |
print(" ".join(["%f"%x for x in row])) | |
(n,row) = next(rowgen) | |
def print_best_estimates_mle(rowgen,maxN): | |
(n,row) = next(rowgen) | |
best_rows = [(n,row) for z in range(len(row))] | |
while n < maxN: | |
(n,row) = next(rowgen) | |
for z in range(len(row)): | |
if row[z] > best_rows[z][1][z]: | |
best_rows[z] = (n,row) | |
elif n == best_rows[z][0] + 1: | |
print("z:",z,"bestn:",n-1,"probability:",best_rows[z][1][z]) | |
# This line prints the best estimate of N for each value of Z. Edit the | |
# parameters to get more values of Z or N | |
print_best_estimates_mle(generate_max_zero_probabilities(20),10000) | |
# This line prints the full probability table up to maxZ and maxN | |
#print_table(generate_max_zero_probabilities(10),100) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment