Skip to content

Instantly share code, notes, and snippets.

@jdp
Created April 5, 2013 06:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdp/5317075 to your computer and use it in GitHub Desktop.
Save jdp/5317075 to your computer and use it in GitHub Desktop.
example LogLog and HyperLogLog cardinality estimators
import math
import struct
def rank(num):
if num == 0:
return 32
p = 0
while (num >> p) & 1 == 0:
p += 1
return p
def lsbp32(v):
bp = [0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9]
return bp[(0xffffffff & ((v & -v) * 0x077cb531)) >> 27]
class LogLogEstimator(object):
def __init__(self, k):
self.k = k
self.m = 2 ** self.k
self.buckets = [0] * self.m
@property
def alpha(self):
# alpha(m) = 1 / (gamma(-1 / m) * (2**(-1 / m) - 1) / log(2))**m
return {16: 0.75207,
32: 0.77308,
64: 0.78356}.get(self.m, 0.79402)
def update(self, values):
for v in values:
h = hash(v)
b = h & (self.m - 1)
self.buckets[b] = max(self.buckets[b], rank(h >> self.k))
def get(self):
return self.alpha * self.m * 2 ** (float(sum(self.buckets)) / self.m)
class HyperLogLogEstimator(object):
def __init__(self, b):
self.b = b
self.m = 2 ** self.b
self.buckets = [0] * self.m
@property
def alpha(self):
return {16: 0.673,
32: 0.697,
64: 0.709}.get(self.m, 0.7213 / (1 + 1.079 / self.m))
def rho(self, val):
return min(2 ** self.b - 1, lsbp32(val) + 1)
def update(self, values):
for v in values:
h = hash(v) & 0xffffffff
i = h & ((1 << self.b) - 1)
self.buckets[i] = max(self.buckets[i], self.rho(h >> self.b))
def get(self):
est = self.alpha * self.m**2
est *= 1 / sum([2**-self.buckets[i] for i in range(self.m)])
if est < 5 / 2 * self.m:
v = self.buckets.count(0)
if v == 0:
dv = est
else:
dv = self.m * math.log(self.m / v)
elif est <= (1 / 30 * 2**32):
dv = est
elif est > (1 / 30 * 2**32):
dv = -2**32 * math.log(1 - est / 2**32)
return dv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment