Skip to content

Instantly share code, notes, and snippets.

@Reimirno
Last active May 5, 2022 02:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Reimirno/6d339396d932952b58769ee91778ff51 to your computer and use it in GitHub Desktop.
Save Reimirno/6d339396d932952b58769ee91778ff51 to your computer and use it in GitHub Desktop.
# A implementation for a string hashset
# using a string hash function similar to Java
# using a linkedlist for collision
# removal is not yet implemented...
class StringHashSet:
def __init__(self,
initial_size = 10,\
resize_factor = 2,\
load_limit = 5,\
hash_len_limit = 10):
self.__map = [None] * initial_size
self.__resize_factor = resize_factor
self.__load_limit = load_limit
self.__hash_len_limit = hash_len_limit
self.__count = 0
def contains(self, s: str) -> bool:
idx = self.__hash(s)
list = self.__map[idx]
if list:
for element in list:
if element == s:
return True
return False
def put(self, s: str) -> bool:
if self.contains(s):
return False
self.__count += 1
if self.__demand_resize:
self.__expand_and_rehash()
self.__actually_put(s)
return True
def __actually_put(self, s: str) -> None:
idx = self.__hash(s)
if not self.__map[idx]:
self.__map[idx] = []
self.__map[idx].append(s)
# To-do: implement removal
def remove(self, s: str) -> None:
pass
def __expand_and_rehash(self):
old = self.__map
self.__map = [None] * self.__capacity * self.__resize_factor
for list in old:
if list:
for element in list:
self.__actually_put(element)
# a hash function for string
# using bit hacks for speed
def __hash(self, s: str) -> int:
rlt, limit = 0, min(len(s), self.__hash_len_limit)
for i in range(0, limit): # base-31 multiplication for every char
rlt = rlt << 5 - rlt + s[i]
if rlt < 0: # ensure hash index > 0
rlt &= 0x7fffffff # all ones bit string
return rlt % self.__capacity # ensure within bound
@property
def __demand_resize(self):
return self.__load_factor >= self.__load_limit
@property
def __load_factor(self):
return self.__count / self.__capacity
@property
def __capacity(self) -> int:
return len(self.__map)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment