Skip to content

Instantly share code, notes, and snippets.

@innateessence
Last active July 4, 2024 19:09
Show Gist options
  • Save innateessence/c626cf7d980c1344ec9c301a28e36768 to your computer and use it in GitHub Desktop.
Save innateessence/c626cf7d980c1344ec9c301a28e36768 to your computer and use it in GitHub Desktop.
hashmap.py - custom python hashmap
#!/usr/bin/env python3
from typing import Any
# Consider using the same binary approach a `switch` statement in C/C++ uses
class VoidType:
"""
This is designed to mimic the `None` type as closely as possible
but with one key difference
we want `Void is None` to return False
This way we can fill an array with a None-like value
while preserving any real use-cases for `None`
"""
_instance = None
_memory_address = 0
def __new__(cls):
if cls._instance is None:
cls._instance = super(VoidType, cls).__new__(cls)
cls._memory_address = id(cls._instance)
return cls._instance
def __str__(self):
return "Void"
def __repr__(self):
return "Void"
def __bool__(self):
return False
def __hash__(self):
return hash(self._memory_address)
def __eq__(self, other):
return (
hasattr(other, "_memory_address")
and self._memory_address == other._memory_address
)
Void = VoidType()
class PrimeGenerator:
@staticmethod
def _is_prime(num):
if num < 2:
return False
for i in range(2, int(num**0.5) + 1):
if num % i == 0:
return False
return True
@classmethod
def generate_primes(cls):
num = 2
while True:
if cls._is_prime(num):
yield num
num += 1
@classmethod
def primes(cls, n=10) -> set[int]:
retval = set()
gen = cls.generate_primes()
while len(retval) < n:
retval.add(next(gen))
return retval
class HashMap:
def __init__(self, size=20480):
self.__malloc_size = size
self.__chars = "0123456789abcdefghijklmnopqrstuvwxyz"
self.__primes = [i for i in PrimeGenerator.primes(len(self.__chars))]
self._keys = set()
self._values = [Void] * size
self._uuid_nums = size // 32
def add(self, key: str | int, value: Any) -> None:
key = str(key).lower()
idx = self.__hash(key)
while idx >= len(self._values):
self.__malloc()
self._keys.add(key)
self._values[idx] = value
self.__free()
def delete(self, key: str | int) -> None:
key = str(key)
idx = self.__hash(key)
self._keys.remove(key)
self._values[idx] = Void
def values(self) -> list[Any]:
retval = set(self._values)
retval.remove(Void)
return list(retval)
def keys(self) -> list[str]:
return list(self._keys)
def __hash(self, key: str | int) -> int:
'''
This isn't really a hash function, it's more of a unique index generator...
'''
key = str(key).lower()
retval = 1
for char in key:
idx = self.__chars.index(char)
prime = self.__primes[idx]
retval *= prime
return retval
def __malloc(self) -> None:
self._values += [Void] * self.__malloc_size
def __free(self) -> None:
"""
Free any over-allocated memory via removing trailing Void
"""
should_stop = False
for i in range(len(self._values) - 1, -1, -1):
if self._values[i] == Void and not should_stop:
self._values.pop(i)
else:
should_stop = True
def __setitem__(self, key: str | int, value: Any) -> None:
key = str(key)
self.add(key, value)
def __getitem__(self, key: str | int) -> Any:
key = str(key)
lookup_idx = self.__hash(key)
return self._values[lookup_idx]
h1 = HashMap()
h2 = HashMap()
h2["foo"] = "bar"
h1["foo"] = h2
print(h1["foo"])
print(h1["foo"]["foo"])
@innateessence
Copy link
Author

innateessence commented Jul 3, 2024

The hashing function is inefficient. I'm aware I should use something that already exists but I wanted to challenge myself to not rely on anything someone else has already made :)

This was just for fun anyways and should never be used in any kind of production system, because while this in theory will never produce a collision, this consumes a massive amount of memory for hashmap insertions because of the approach I used to avoid collisions :)

Now I have no reservations against reading the source code of the dict in Python

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment