Skip to content

Instantly share code, notes, and snippets.

@Sangarshanan
Created January 4, 2021 19:30
Show Gist options
  • Save Sangarshanan/f5280d1c7147916a41a8f357941a45a2 to your computer and use it in GitHub Desktop.
Save Sangarshanan/f5280d1c7147916a41a8f357941a45a2 to your computer and use it in GitHub Desktop.
"""Hyper Log Log."""
import math
from hashlib import sha1
class HyperLogLog(object):
"""HyperLogLog."""
def __init__(self, k):
"""Initialize Bucket Size."""
self.k = k
self.number_of_bits = 2 ** k
self.M = [0 for i in range(self.number_of_bits)]
def add(self, value):
"""Add an element."""
x = int(sha1(bytes(value.encode('utf8') if isinstance(value, str) else value)).hexdigest()[:16], 16)
j = x & (self.number_of_bits - 1)
w = int(x / self.number_of_bits)
self.M[j] = max(self.M[j], (64 - self.k) - w.bit_length() + 1)
def cardinality(self):
"""Return the cardinality."""
v = self.M.count(0) # number of zeros
return self.number_of_bits * math.log(self.number_of_bits / float(v))
# 16-bit HyperLogLog
h = HyperLogLog(k=16)
for val in range(10000):
h.add(val)
print(h.cardinality())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment