Skip to content

Instantly share code, notes, and snippets.

@Sangarshanan
Created January 6, 2021 19:23
Show Gist options
  • Save Sangarshanan/4004b62b0a2691e351e0bfdffbcaee20 to your computer and use it in GitHub Desktop.
Save Sangarshanan/4004b62b0a2691e351e0bfdffbcaee20 to your computer and use it in GitHub Desktop.
"""Count Min Hash."""
from hashlib import md5, sha1, blake2b, sha3_512
def get_row_column(value, hash_fns, m):
"""Fetch Row and Column."""
for row, _hash_fn in enumerate(hash_fns):
column = (
int(
_hash_fn(
bytes(value.encode("utf8") if isinstance(value, str) else value)
).hexdigest()[:16],
16,
)
% m
)
yield row, column
class CountMinSketch(object):
"""CountMinSketch."""
def __init__(self, m=16):
"""Initialize."""
self.m = m # Size of Hash Tables
self.hash_fns = [md5, sha1, blake2b, sha3_512] # Hash funs
self.n = len(self.hash_fns) # Number of Hash Funcs
self.table = [[0] * m for _ in range(self.n)] # Matrix
def add(self, value):
"""Add value to Hashmap."""
hashtable = get_row_column(value, self.hash_fns, self.m)
for row, column in hashtable:
self.table[row][column] += 1
def count(self, value):
"""Frequency of a value."""
return min(
[
self.table[row][column]
for row, column in get_row_column(value, self.hash_fns, self.m)
]
)
sketch = CountMinSketch()
for val in [1, 2, 1, 2, 1, 3, 4, 5]:
sketch.add(val)
print(sketch.count(1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment