Created
January 6, 2021 19:23
-
-
Save Sangarshanan/4004b62b0a2691e351e0bfdffbcaee20 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Count Min Hash.""" | |
from hashlib import md5, sha1, blake2b, sha3_512 | |
def get_row_column(value, hash_fns, m): | |
"""Fetch Row and Column.""" | |
for row, _hash_fn in enumerate(hash_fns): | |
column = ( | |
int( | |
_hash_fn( | |
bytes(value.encode("utf8") if isinstance(value, str) else value) | |
).hexdigest()[:16], | |
16, | |
) | |
% m | |
) | |
yield row, column | |
class CountMinSketch(object): | |
"""CountMinSketch.""" | |
def __init__(self, m=16): | |
"""Initialize.""" | |
self.m = m # Size of Hash Tables | |
self.hash_fns = [md5, sha1, blake2b, sha3_512] # Hash funs | |
self.n = len(self.hash_fns) # Number of Hash Funcs | |
self.table = [[0] * m for _ in range(self.n)] # Matrix | |
def add(self, value): | |
"""Add value to Hashmap.""" | |
hashtable = get_row_column(value, self.hash_fns, self.m) | |
for row, column in hashtable: | |
self.table[row][column] += 1 | |
def count(self, value): | |
"""Frequency of a value.""" | |
return min( | |
[ | |
self.table[row][column] | |
for row, column in get_row_column(value, self.hash_fns, self.m) | |
] | |
) | |
sketch = CountMinSketch() | |
for val in [1, 2, 1, 2, 1, 3, 4, 5]: | |
sketch.add(val) | |
print(sketch.count(1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment