Last active
July 4, 2024 19:09
-
-
Save innateessence/c626cf7d980c1344ec9c301a28e36768 to your computer and use it in GitHub Desktop.
hashmap.py - custom python hashmap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from typing import Any | |
# Consider using the same binary approach a `switch` statement in C/C++ uses | |
class VoidType: | |
""" | |
This is designed to mimic the `None` type as closely as possible | |
but with one key difference | |
we want `Void is None` to return False | |
This way we can fill an array with a None-like value | |
while preserving any real use-cases for `None` | |
""" | |
_instance = None | |
_memory_address = 0 | |
def __new__(cls): | |
if cls._instance is None: | |
cls._instance = super(VoidType, cls).__new__(cls) | |
cls._memory_address = id(cls._instance) | |
return cls._instance | |
def __str__(self): | |
return "Void" | |
def __repr__(self): | |
return "Void" | |
def __bool__(self): | |
return False | |
def __hash__(self): | |
return hash(self._memory_address) | |
def __eq__(self, other): | |
return ( | |
hasattr(other, "_memory_address") | |
and self._memory_address == other._memory_address | |
) | |
Void = VoidType() | |
class PrimeGenerator: | |
@staticmethod | |
def _is_prime(num): | |
if num < 2: | |
return False | |
for i in range(2, int(num**0.5) + 1): | |
if num % i == 0: | |
return False | |
return True | |
@classmethod | |
def generate_primes(cls): | |
num = 2 | |
while True: | |
if cls._is_prime(num): | |
yield num | |
num += 1 | |
@classmethod | |
def primes(cls, n=10) -> set[int]: | |
retval = set() | |
gen = cls.generate_primes() | |
while len(retval) < n: | |
retval.add(next(gen)) | |
return retval | |
class HashMap: | |
def __init__(self, size=20480): | |
self.__malloc_size = size | |
self.__chars = "0123456789abcdefghijklmnopqrstuvwxyz" | |
self.__primes = [i for i in PrimeGenerator.primes(len(self.__chars))] | |
self._keys = set() | |
self._values = [Void] * size | |
self._uuid_nums = size // 32 | |
def add(self, key: str | int, value: Any) -> None: | |
key = str(key).lower() | |
idx = self.__hash(key) | |
while idx >= len(self._values): | |
self.__malloc() | |
self._keys.add(key) | |
self._values[idx] = value | |
self.__free() | |
def delete(self, key: str | int) -> None: | |
key = str(key) | |
idx = self.__hash(key) | |
self._keys.remove(key) | |
self._values[idx] = Void | |
def values(self) -> list[Any]: | |
retval = set(self._values) | |
retval.remove(Void) | |
return list(retval) | |
def keys(self) -> list[str]: | |
return list(self._keys) | |
def __hash(self, key: str | int) -> int: | |
''' | |
This isn't really a hash function, it's more of a unique index generator... | |
''' | |
key = str(key).lower() | |
retval = 1 | |
for char in key: | |
idx = self.__chars.index(char) | |
prime = self.__primes[idx] | |
retval *= prime | |
return retval | |
def __malloc(self) -> None: | |
self._values += [Void] * self.__malloc_size | |
def __free(self) -> None: | |
""" | |
Free any over-allocated memory via removing trailing Void | |
""" | |
should_stop = False | |
for i in range(len(self._values) - 1, -1, -1): | |
if self._values[i] == Void and not should_stop: | |
self._values.pop(i) | |
else: | |
should_stop = True | |
def __setitem__(self, key: str | int, value: Any) -> None: | |
key = str(key) | |
self.add(key, value) | |
def __getitem__(self, key: str | int) -> Any: | |
key = str(key) | |
lookup_idx = self.__hash(key) | |
return self._values[lookup_idx] | |
h1 = HashMap() | |
h2 = HashMap() | |
h2["foo"] = "bar" | |
h1["foo"] = h2 | |
print(h1["foo"]) | |
print(h1["foo"]["foo"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The hashing function is inefficient. I'm aware I should use something that already exists but I wanted to challenge myself to not rely on anything someone else has already made :)
This was just for fun anyways and should never be used in any kind of production system, because while this in theory will never produce a collision, this consumes a massive amount of memory for hashmap insertions because of the approach I used to avoid collisions :)
Now I have no reservations against reading the source code of the
dict
in Python