Last active
May 2, 2021 20:14
-
-
Save michaelhelvey/915688a2b42fdb42b4b58d4fd97bbff5 to your computer and use it in GitHub Desktop.
hash map in C++ (very shit do not look)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cassert> | |
#include <iostream> | |
#include <string> | |
#include <vector> | |
#include <stddef.h> | |
template <typename Value> | |
class Node { | |
public: | |
Node(std::string k, Value v) : m_key(k), m_value(v), m_next(nullptr) {}; | |
std::string& key() { return m_key; }; | |
Value& value() { return m_value; }; | |
Node* next() { return m_next.get(); }; | |
Node* release_next() { return m_next.release(); }; | |
void set_next(std::unique_ptr<Node> next) | |
{ | |
m_next = std::move(next); | |
}; | |
void clear_next() | |
{ | |
m_next.reset(nullptr); | |
} | |
private: | |
std::string m_key; | |
Value m_value; | |
std::unique_ptr<Node> m_next; | |
}; | |
static const size_t default_hash_map_storage_size = 4; | |
template <class Value> | |
class MHashMap { | |
public: | |
MHashMap() : MHashMap(default_hash_map_storage_size) {}; | |
// do not allow copying | |
MHashMap(const MHashMap& other) = delete; | |
MHashMap& operator=(const MHashMap& other) = delete; | |
// do allow moving | |
MHashMap(MHashMap&& other) noexcept : MHashMap(other.m_storage_size) { | |
swap(*this, other); | |
}; | |
MHashMap& operator=(MHashMap&& other) { | |
if (this != &other) { | |
m_storage_size = other.m_storage_size; | |
free_nodes(m_storage, m_storage_size); | |
m_storage = other.m_storage; | |
} | |
return *this; | |
}; | |
Value get(std::string key) | |
{ | |
Node<Value>* current_node = get_node_for_key(key); | |
if (current_node) { | |
return current_node->value(); | |
} | |
throw "bad access"; | |
} | |
void insert(std::string key, Value value) { | |
int hash_key = hash(key); | |
Node<Value>* current_value = m_storage[hash_key]; | |
if (current_value) { | |
Node<Value>* parent_to_current = nullptr; | |
while (current_value->next() && current_value->key() != key) { | |
parent_to_current = current_value; | |
current_value = current_value->next(); | |
} | |
if (current_value->key() == key) { | |
// the user is trying to replace the key | |
auto new_node = new Node(key, value); | |
if (parent_to_current) { | |
parent_to_current->set_next(std::unique_ptr<Node<Value>>(new_node)); | |
} | |
else { | |
// we are replacing it directly in the storage list | |
m_storage[hash_key] = new_node; | |
} | |
// IF we are replacing a node that has children, we also need | |
// to replace the children of that node | |
if (current_value->next()) { | |
// if we are replacing the children, we only want to | |
// deallocate the current value, without recursively | |
// freeing children, because we've moved the children out | |
// of current_value | |
new_node->set_next(std::unique_ptr<Node<Value>>(current_value->release_next())); | |
} | |
delete current_value; | |
// NOTE: we do not update m_entries_count for a replace operation | |
} | |
else { | |
// we have a hash collision | |
auto new_node = std::make_unique<Node<Value>>(key, value); | |
current_value->set_next(std::move(new_node)); | |
m_entries_count++; | |
} | |
} | |
else { | |
// there is no current value | |
auto new_node = new Node(key, value); | |
m_storage[hash_key] = new_node; | |
m_entries_count++; | |
} | |
size_t resize_up_amount = should_resize_up(); | |
if (resize_up_amount) | |
{ | |
resize(resize_up_amount); | |
} | |
} | |
bool remove(std::string key) | |
{ | |
int hash_key = hash(key); | |
Node<Value>* current_value = m_storage[hash_key]; | |
if (current_value) { | |
Node<Value>* parent_value = nullptr; | |
while (current_value->next() && current_value->key() != key) { | |
parent_value = current_value; | |
current_value = current_value->next(); | |
} | |
if (parent_value) { | |
// current value was part of a LL, in which case we can | |
// deallocate current by clearing the parent | |
parent_value->clear_next(); | |
} | |
else { | |
// else we can clean up the top level storage | |
delete current_value; | |
m_storage[hash_key] = nullptr; | |
} | |
m_entries_count--; | |
size_t resize_down_amount = should_resize_down(); | |
if (resize_down_amount) { | |
resize(resize_down_amount); | |
} | |
return true; | |
} | |
// no current value, do nothing | |
return false; | |
} | |
~MHashMap() { | |
free_nodes(m_storage, m_storage_size); | |
} | |
private: | |
size_t m_storage_size; | |
size_t m_entries_count; | |
Node<Value>** m_storage; | |
MHashMap(size_t storage_size) : | |
m_storage_size(storage_size), | |
m_entries_count(0), | |
m_storage(new Node<Value>* [storage_size]()) {}; | |
void free_nodes(Node<Value>** storage, size_t storage_size) | |
{ | |
for (size_t i = 0; i < storage_size; ++i) { | |
delete storage[i]; | |
} | |
delete[]storage; | |
} | |
void swap(MHashMap& first, MHashMap& other) | |
{ | |
std::swap(first.m_storage, other.m_storage); | |
std::swap(first.m_storage_size, other.m_storage_size); | |
} | |
int hash(std::string key) | |
{ | |
long long hash_v = 0; | |
for (auto c : key) { | |
hash_v = ((hash_v << 5) - hash_v) + c; | |
} | |
return hash_v % m_storage_size; | |
} | |
Node<Value>* get_node_for_key(std::string key) | |
{ | |
int hash_key = hash(key); | |
Node<Value>* current_node = m_storage[hash_key]; | |
if (!current_node) { | |
return nullptr; | |
} | |
// in the case of a collision, forward to node with key | |
// potential problem: lots of string comparisons with large keys if we have a lot of collisions | |
// solution: interning, but will be expensive if we don't have a lot of collisions | |
// real solutions: just don't have collisions 4Head | |
while (current_node->next() && current_node->key() != key) { | |
current_node = current_node->next(); | |
} | |
if (current_node && current_node->key() == key) { | |
return current_node; | |
} | |
return nullptr; | |
} | |
size_t should_resize_up() | |
{ | |
// if m_entries_count is larger than 75% of our storage size, double the size | |
double resize_max_percentage = 0.75; | |
if (m_entries_count >= resize_max_percentage * m_storage_size) { | |
size_t new_size = m_storage_size * 2; | |
if (new_size > default_hash_map_storage_size) { | |
return new_size; | |
} | |
else { | |
// never allow resizing below the default (otherwise hash maps | |
// with sizes of 1 and 2 would just be resizing all the time, | |
// which is bad) | |
return 0; | |
} | |
} | |
return 0; | |
} | |
size_t should_resize_down() | |
{ | |
double resize_min_percentage = 0.25; | |
if (m_entries_count <= resize_min_percentage * m_storage_size) { | |
size_t new_size = m_storage_size / 2; | |
return new_size; | |
} | |
return 0; | |
} | |
void resize(size_t new_size) | |
{ | |
auto new_storage = new Node<Value>*[new_size](); | |
size_t old_storage_size = m_storage_size; | |
Node<Value>** old_storage = m_storage; | |
m_storage_size = new_size; | |
m_storage = new_storage; | |
for (int i = 0; i < old_storage_size; i++) { | |
Node<Value>* n = old_storage[i]; | |
if (!n) { | |
continue; | |
} | |
auto insert_into_storage_or_list = [this](Node<Value>* node) { | |
int nh = hash(node->key()); | |
Node<Value>* cn = m_storage[nh]; | |
if (cn) { | |
// A collision can mean two things: | |
// 1) The hashing algorithm collided with itself in such a | |
// way that the node we are trying to insert is effectively | |
// being moved from one linked list into another. | |
// 2) The hashing algorithm has collided with itself in an | |
// identical way to before, such that the node we are | |
// trying to insert is ALREADY a child of n (cn === n), in | |
// which case we should do nothing (identity between a node | |
// and node's next pointer will cause an infinite loop when | |
// trying to traverse the list) | |
while (cn->next() && cn->next() != node) { | |
cn = cn->next(); | |
} | |
if (cn->next() != node) { | |
cn->set_next(std::unique_ptr<Node<Value>>(node)); | |
} | |
} | |
else { | |
// no collision, so we can set directly | |
m_storage[nh] = node; | |
} | |
}; | |
// insert the node | |
insert_into_storage_or_list(n); | |
// then if the node has children, deal with them in the same way | |
while (n->next()) { | |
insert_into_storage_or_list(n->next()); | |
n = n->next(); | |
} | |
} | |
delete[] old_storage; | |
} | |
}; | |
int main() | |
{ | |
auto a = std::make_unique<Node<std::string>>("some key", "some value"); | |
MHashMap<std::string> a1; | |
a1.insert("key 1", "value 1"); | |
a1.insert("key 2", "value 2"); | |
a1.insert("key 3", "value 3"); | |
// trigger resize | |
a1.insert("key 4", "value 1234"); | |
a1.insert("key 5", "value 245"); | |
// should simply replace the key | |
a1.insert("key 1", "very different value"); | |
a1.remove("key 3"); | |
MHashMap<std::string> a2 = std::move(a1); | |
assert(a2.get("key 5") == "value 245"); | |
assert(a2.get("key 1") == "very different value"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment