Skip to content

Instantly share code, notes, and snippets.

@michaelhelvey
Last active May 2, 2021 20:14
Show Gist options
  • Save michaelhelvey/915688a2b42fdb42b4b58d4fd97bbff5 to your computer and use it in GitHub Desktop.
Save michaelhelvey/915688a2b42fdb42b4b58d4fd97bbff5 to your computer and use it in GitHub Desktop.
hash map in C++ (very shit do not look)
#include <cassert>
#include <iostream>
#include <string>
#include <vector>
#include <stddef.h>
template <typename Value>
class Node {
public:
Node(std::string k, Value v) : m_key(k), m_value(v), m_next(nullptr) {};
std::string& key() { return m_key; };
Value& value() { return m_value; };
Node* next() { return m_next.get(); };
Node* release_next() { return m_next.release(); };
void set_next(std::unique_ptr<Node> next)
{
m_next = std::move(next);
};
void clear_next()
{
m_next.reset(nullptr);
}
private:
std::string m_key;
Value m_value;
std::unique_ptr<Node> m_next;
};
static const size_t default_hash_map_storage_size = 4;
template <class Value>
class MHashMap {
public:
MHashMap() : MHashMap(default_hash_map_storage_size) {};
// do not allow copying
MHashMap(const MHashMap& other) = delete;
MHashMap& operator=(const MHashMap& other) = delete;
// do allow moving
MHashMap(MHashMap&& other) noexcept : MHashMap(other.m_storage_size) {
swap(*this, other);
};
MHashMap& operator=(MHashMap&& other) {
if (this != &other) {
m_storage_size = other.m_storage_size;
free_nodes(m_storage, m_storage_size);
m_storage = other.m_storage;
}
return *this;
};
Value get(std::string key)
{
Node<Value>* current_node = get_node_for_key(key);
if (current_node) {
return current_node->value();
}
throw "bad access";
}
void insert(std::string key, Value value) {
int hash_key = hash(key);
Node<Value>* current_value = m_storage[hash_key];
if (current_value) {
Node<Value>* parent_to_current = nullptr;
while (current_value->next() && current_value->key() != key) {
parent_to_current = current_value;
current_value = current_value->next();
}
if (current_value->key() == key) {
// the user is trying to replace the key
auto new_node = new Node(key, value);
if (parent_to_current) {
parent_to_current->set_next(std::unique_ptr<Node<Value>>(new_node));
}
else {
// we are replacing it directly in the storage list
m_storage[hash_key] = new_node;
}
// IF we are replacing a node that has children, we also need
// to replace the children of that node
if (current_value->next()) {
// if we are replacing the children, we only want to
// deallocate the current value, without recursively
// freeing children, because we've moved the children out
// of current_value
new_node->set_next(std::unique_ptr<Node<Value>>(current_value->release_next()));
}
delete current_value;
// NOTE: we do not update m_entries_count for a replace operation
}
else {
// we have a hash collision
auto new_node = std::make_unique<Node<Value>>(key, value);
current_value->set_next(std::move(new_node));
m_entries_count++;
}
}
else {
// there is no current value
auto new_node = new Node(key, value);
m_storage[hash_key] = new_node;
m_entries_count++;
}
size_t resize_up_amount = should_resize_up();
if (resize_up_amount)
{
resize(resize_up_amount);
}
}
bool remove(std::string key)
{
int hash_key = hash(key);
Node<Value>* current_value = m_storage[hash_key];
if (current_value) {
Node<Value>* parent_value = nullptr;
while (current_value->next() && current_value->key() != key) {
parent_value = current_value;
current_value = current_value->next();
}
if (parent_value) {
// current value was part of a LL, in which case we can
// deallocate current by clearing the parent
parent_value->clear_next();
}
else {
// else we can clean up the top level storage
delete current_value;
m_storage[hash_key] = nullptr;
}
m_entries_count--;
size_t resize_down_amount = should_resize_down();
if (resize_down_amount) {
resize(resize_down_amount);
}
return true;
}
// no current value, do nothing
return false;
}
~MHashMap() {
free_nodes(m_storage, m_storage_size);
}
private:
size_t m_storage_size;
size_t m_entries_count;
Node<Value>** m_storage;
MHashMap(size_t storage_size) :
m_storage_size(storage_size),
m_entries_count(0),
m_storage(new Node<Value>* [storage_size]()) {};
void free_nodes(Node<Value>** storage, size_t storage_size)
{
for (size_t i = 0; i < storage_size; ++i) {
delete storage[i];
}
delete[]storage;
}
void swap(MHashMap& first, MHashMap& other)
{
std::swap(first.m_storage, other.m_storage);
std::swap(first.m_storage_size, other.m_storage_size);
}
int hash(std::string key)
{
long long hash_v = 0;
for (auto c : key) {
hash_v = ((hash_v << 5) - hash_v) + c;
}
return hash_v % m_storage_size;
}
Node<Value>* get_node_for_key(std::string key)
{
int hash_key = hash(key);
Node<Value>* current_node = m_storage[hash_key];
if (!current_node) {
return nullptr;
}
// in the case of a collision, forward to node with key
// potential problem: lots of string comparisons with large keys if we have a lot of collisions
// solution: interning, but will be expensive if we don't have a lot of collisions
// real solutions: just don't have collisions 4Head
while (current_node->next() && current_node->key() != key) {
current_node = current_node->next();
}
if (current_node && current_node->key() == key) {
return current_node;
}
return nullptr;
}
size_t should_resize_up()
{
// if m_entries_count is larger than 75% of our storage size, double the size
double resize_max_percentage = 0.75;
if (m_entries_count >= resize_max_percentage * m_storage_size) {
size_t new_size = m_storage_size * 2;
if (new_size > default_hash_map_storage_size) {
return new_size;
}
else {
// never allow resizing below the default (otherwise hash maps
// with sizes of 1 and 2 would just be resizing all the time,
// which is bad)
return 0;
}
}
return 0;
}
size_t should_resize_down()
{
double resize_min_percentage = 0.25;
if (m_entries_count <= resize_min_percentage * m_storage_size) {
size_t new_size = m_storage_size / 2;
return new_size;
}
return 0;
}
void resize(size_t new_size)
{
auto new_storage = new Node<Value>*[new_size]();
size_t old_storage_size = m_storage_size;
Node<Value>** old_storage = m_storage;
m_storage_size = new_size;
m_storage = new_storage;
for (int i = 0; i < old_storage_size; i++) {
Node<Value>* n = old_storage[i];
if (!n) {
continue;
}
auto insert_into_storage_or_list = [this](Node<Value>* node) {
int nh = hash(node->key());
Node<Value>* cn = m_storage[nh];
if (cn) {
// A collision can mean two things:
// 1) The hashing algorithm collided with itself in such a
// way that the node we are trying to insert is effectively
// being moved from one linked list into another.
// 2) The hashing algorithm has collided with itself in an
// identical way to before, such that the node we are
// trying to insert is ALREADY a child of n (cn === n), in
// which case we should do nothing (identity between a node
// and node's next pointer will cause an infinite loop when
// trying to traverse the list)
while (cn->next() && cn->next() != node) {
cn = cn->next();
}
if (cn->next() != node) {
cn->set_next(std::unique_ptr<Node<Value>>(node));
}
}
else {
// no collision, so we can set directly
m_storage[nh] = node;
}
};
// insert the node
insert_into_storage_or_list(n);
// then if the node has children, deal with them in the same way
while (n->next()) {
insert_into_storage_or_list(n->next());
n = n->next();
}
}
delete[] old_storage;
}
};
int main()
{
auto a = std::make_unique<Node<std::string>>("some key", "some value");
MHashMap<std::string> a1;
a1.insert("key 1", "value 1");
a1.insert("key 2", "value 2");
a1.insert("key 3", "value 3");
// trigger resize
a1.insert("key 4", "value 1234");
a1.insert("key 5", "value 245");
// should simply replace the key
a1.insert("key 1", "very different value");
a1.remove("key 3");
MHashMap<std::string> a2 = std::move(a1);
assert(a2.get("key 5") == "value 245");
assert(a2.get("key 1") == "very different value");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment