Skip to content

Instantly share code, notes, and snippets.

@senderista
Created February 17, 2024 02:45
Show Gist options
  • Save senderista/2698d41b421515799f0080c8e3753bfb to your computer and use it in GitHub Desktop.
Save senderista/2698d41b421515799f0080c8e3753bfb to your computer and use it in GitHub Desktop.
simulates hashing into 8-cell bins
#!/usr/bin/env python3
import numpy
class BinHasher8EntriesOneChoice:
def __init__(self, size_exp: int) -> None:
# each bin is represented by a byte-size bitvector
self.size_exp: int = size_exp
self.array = numpy.zeros([2 ** (self.size_exp - 3)], dtype='uint8')
self.overflows: int = 0
self.bin_mask: numpy.uint64 = numpy.uint64(2 ** (self.size_exp - 3) - 1) << numpy.uint64(64 - self.size_exp)
"""
Returns True if insert was successful, False on overflow.
"""
def insert(self, num: int) -> bool:
# find bin using leftmost {log2(bins)} bits of value
bin_index: int = numpy.uint64(num & self.bin_mask) >> numpy.uint64(64 - self.size_exp)
bin_byte: numpy.uint8 = self.array[bin_index]
# if bin is full then increment overflow counter and exit
if bin_byte == 0xff:
self.overflows += 1
return False
# find rightmost unset bit in bin byte and set it
# (this cannot overflow since we already checked for the max byte value)
bin_byte = bin_byte | (bin_byte + 1)
self.array[bin_index] = bin_byte
return True
if __name__ == "__main__":
for size_exp in range(8, 27):
print(f"Table size: {2 ** size_exp}")
table = BinHasher8EntriesOneChoice(size_exp)
first_overflow = False
for i in range(1, (2 ** table.size_exp) + 1):
num: int = numpy.random.randint(1, high=numpy.iinfo(numpy.uint64).max, dtype=numpy.uint64)
if not table.insert(num) and not first_overflow:
first_overflow = True
print(f"Overflowed at {i} elements (fraction: {i / (2 ** table.size_exp)})")
print(f"Overflows for {2 ** table.size_exp} elements: {table.overflows} (fraction: {table.overflows / (2 ** table.size_exp)})")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment