Created
February 17, 2024 02:45
-
-
Save senderista/2698d41b421515799f0080c8e3753bfb to your computer and use it in GitHub Desktop.
simulates hashing into 8-cell bins
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import numpy | |
class BinHasher8EntriesOneChoice: | |
def __init__(self, size_exp: int) -> None: | |
# each bin is represented by a byte-size bitvector | |
self.size_exp: int = size_exp | |
self.array = numpy.zeros([2 ** (self.size_exp - 3)], dtype='uint8') | |
self.overflows: int = 0 | |
self.bin_mask: numpy.uint64 = numpy.uint64(2 ** (self.size_exp - 3) - 1) << numpy.uint64(64 - self.size_exp) | |
""" | |
Returns True if insert was successful, False on overflow. | |
""" | |
def insert(self, num: int) -> bool: | |
# find bin using leftmost {log2(bins)} bits of value | |
bin_index: int = numpy.uint64(num & self.bin_mask) >> numpy.uint64(64 - self.size_exp) | |
bin_byte: numpy.uint8 = self.array[bin_index] | |
# if bin is full then increment overflow counter and exit | |
if bin_byte == 0xff: | |
self.overflows += 1 | |
return False | |
# find rightmost unset bit in bin byte and set it | |
# (this cannot overflow since we already checked for the max byte value) | |
bin_byte = bin_byte | (bin_byte + 1) | |
self.array[bin_index] = bin_byte | |
return True | |
if __name__ == "__main__": | |
for size_exp in range(8, 27): | |
print(f"Table size: {2 ** size_exp}") | |
table = BinHasher8EntriesOneChoice(size_exp) | |
first_overflow = False | |
for i in range(1, (2 ** table.size_exp) + 1): | |
num: int = numpy.random.randint(1, high=numpy.iinfo(numpy.uint64).max, dtype=numpy.uint64) | |
if not table.insert(num) and not first_overflow: | |
first_overflow = True | |
print(f"Overflowed at {i} elements (fraction: {i / (2 ** table.size_exp)})") | |
print(f"Overflows for {2 ** table.size_exp} elements: {table.overflows} (fraction: {table.overflows / (2 ** table.size_exp)})") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment