Skip to content

Instantly share code, notes, and snippets.

@KaceCottam
Created April 22, 2021 23:11
Show Gist options
  • Save KaceCottam/bec45f16bf3000289525a19e6f6f2e92 to your computer and use it in GitHub Desktop.
Save KaceCottam/bec45f16bf3000289525a19e6f6f2e92 to your computer and use it in GitHub Desktop.
Various rehashing tables made in C++
// Created by Kace Cottam, 4/20/2021
#include <array>
#include <list>
#include <utility> // std::pair
// std::unordered_map<Key, Value, Hash=std::hash<Key>>
template<class Key, class Value, int NumberOfBuckets = 117, class Hash = std::hash<Key>>
class ChainingHashTable {
private:
using KeyValuePair = std::pair<Key, Value>; // type alias
using Bucket = std::list<KeyValuePair>; // type alias
std::array<Bucket, NumberOfBuckets> buckets;
KeyValuePair* lookup(Bucket& b, const Key& key) {
for (KeyValuePair& pair : b) {
if (pair.first == key) {
return &pair;
}
}
return nullptr;
}
public:
void insert(const Key& k, const Value& v) {
// calculate hash
int hashed_value = abs(Hash{}(k));
// find the bucket
Bucket& chosen_bucket = buckets[hashed_value % NumberOfBuckets];
// lookup to see if this key is in the hashtable
KeyValuePair* kvp = lookup(chosen_bucket, k);
if (kvp == nullptr) { // if it is not,
// insert a new pair
chosen_bucket.push_front(std::make_pair(k, v));
} else { // if it is,
// change the value of that pair
kvp->second = v;
}
}
Value* get(const Key& k) {
// calculate hash
int hashed_value = abs(Hash{}(k));
// find the bucket
Bucket& chosen_bucket = buckets[hashed_value % NumberOfBuckets];
// lookup to see if this key is in the hashtable
KeyValuePair* kvp = lookup(chosen_bucket, k);
if (kvp == nullptr) { // if it is not,
// nothing to return
return nullptr;
} else { // if it is,
// return address of the value;
return &kvp->second;
}
}
Value& operator[](const Key& k) {
// it will return the reference to the key's
// value if it exists,
// if it does not exist, it will create a
// new KeyValuePair and return a reference
// to that value.
// we will use get first
Value* found_value = this->get(k);
if (found_value != nullptr) {
return *found_value;
}
// now we NEED to insert a new value (found_value == nullptr)
// calculate hash
int hashed_value = abs(Hash{}(k));
// find the bucket
Bucket& chosen_bucket = buckets[hashed_value % NumberOfBuckets];
// add new key-value-pair to front of the bucket
chosen_bucket.push_front(std::make_pair(k, Value()));
// get a reference to the first item in the bucket (the pair we just inserted)
KeyValuePair& kvp = *chosen_bucket.begin();
return kvp.second;
}
};
#include <string>
#include <cassert>
int main() {
ChainingHashTable<std::string, int> hashTable;
hashTable.insert("hello, world!", 5);
assert(*hashTable.get("hello, world!") == 5);
assert(hashTable.get("not in hashtable!") == nullptr);
assert(hashTable["hello, world!"] == 5);
hashTable.insert("hello, world!", 10);
assert(*hashTable.get("hello, world!") == 10);
assert(hashTable["hello, world!"] == 10);
hashTable["now in hashtable!"] = 399;
assert(hashTable["now in hashtable!"] == 399);
hashTable["neat"] = 403;
assert(hashTable["neat"] == 403);
hashTable["n"] = 0;
hashTable["na"] = 0;
hashTable["nb"] = 0;
hashTable["nc"] = 0;
hashTable["nca"] = 0;
hashTable["ncad"] = 0;
hashTable["ncadx"] = 0;
hashTable["ncadxx"] = 0;
hashTable["ncadxxx"] = 0;
hashTable["ncadxxxx"] = 0;
hashTable["ncadxxxxx"] = 0;
hashTable["ncadxxxxxx"] = 0;
hashTable["ncadxxxxxxx"] = 0;
hashTable["ncadxxxxxxxx"] = 0;
hashTable["ncadxxxxxxxxx"] = 0;
}
// Created by Kace Cottam, 4/22/2021
#include <memory> // std::shared_ptr
#include <utility> // std::tuple
#include <vector>
#include <stdexcept> // std::runtime_error
// std::unordered_map<Key, Value, Hash=std::hash<Key>>
template <class Key, class Value, class Hash = std::hash<Key>>
class ProbingHashTable {
private:
using LazyDeletionPair = std::tuple<Key, Value, bool>; // type alias
using OptionalLazyDeletionPair = std::shared_ptr<LazyDeletionPair>; // type alias
std::vector<OptionalLazyDeletionPair> pairs; // will start at size 11
int inserted_values = 0; // want to keep track so we tell when we hit our threshold
// for resizing
public:
ProbingHashTable() {
pairs.resize(11); // starting value from which we can resize
}
void insert(const Key& k, const Value& v) {
// insert key value pair or change a key's value
int pairs_length = pairs.size();
int hash = abs(Hash()(k)); // figure out the hash (must be postitive)
for (int i = 0; /*infinite loop*/; i++) {
int idx = (hash + i) % pairs_length;
if (pairs[idx] == nullptr) {
// nothing at this position
// create a new thing
pairs[idx] = std::make_shared<LazyDeletionPair>(k, v, false);
inserted_values += 1;
break;
} else if (std::get<2>(*pairs[idx]) == true) {
// if marked for lazy deletion
pairs[idx] = std::make_shared<LazyDeletionPair>(k, v, false);
break;
// overwrite what is there
} else if (std::get<0>(*pairs[idx]) == k) {
// modifying the existing thing
std::get<1>(*pairs[idx]) = v;
break;
}
// continue in our linear probing
}
// we want to resize our pairs vector when we find that the number of
// inserted elements is 2/3 of our max capacity
// (to prevent collisions)
const float threshold = 2.0 / 3.0; // 1/3 of the pockets are empty
const float current_ratio_of_inserted_values_to_size =
(float)inserted_values / (pairs_length + 1);
if (current_ratio_of_inserted_values_to_size >= threshold) {
// we want to resize our pairs vector
// we will need to rehash our values
std::vector<OptionalLazyDeletionPair> oldpairs = this->pairs;
this->pairs.clear();
this->pairs.resize(oldpairs.size() * 2 + 1);
this->inserted_values = 0;
for (auto pair : oldpairs) if (pair != nullptr) { // O(N)
auto key = std::move(std::get<0>(*pair));
auto value = std::move(std::get<1>(*pair));
auto deleted = std::move(std::get<2>(*pair));
if (!deleted) { // we can ignore deleted values since we are now resetting the hashTable
this->insert(key, value); // recursive call ALWAYS O(1)
// threshold was *barely* met, so since we doubled the length of pairs,
// the inserted_pairs will always be under the threshold.
// this will insert in `this->pairs`
}
}
}
}
Value* get(const Key& k) {
// insert key value pair or change a key's value
int hash = abs(Hash()(k)); // figure out the hash (must be postitive)
int pairs_length = pairs.size();
for (int i = 0; /*infinite loop*/; i++) {
int idx = (hash + i) % pairs_length;
if (pairs[idx] == nullptr) {
return nullptr;
}
else if (std::get<2>(*pairs[idx]) != true // pair has not been deleted
&& std::get<0>(*pairs[idx]) == k) // key value is what we are searching for
{
return &std::get<1>(*pairs[idx]);
}
// continue in our linear probing
}
}
Value& operator[](const Key& k) {
// it will return the reference to the key's
// value if it exists,
// if it does not exist, it will create a
// new KeyValuePair and return a reference
// to that value.
auto value = this->get(k);
if (value != nullptr) {
return *value;
}
this->insert(k, Value{}); // call default constructor for value
if (auto value = this->get(k)) {
return *value; // return value now
}
// wasn't able to create the key for some reason (shouldn't happen)
throw std::runtime_error("Probing Hash Table operator[] bad insertion");
}
void remove(const Key& k) {
int hash = abs(Hash()(k)); // figure out the hash (must be postitive)
int pairs_length = pairs.size();
for (int i = 0; pairs[(hash + i) % pairs_length] != nullptr; i++) { // while we are not at an empty pocket
if (std::get<2>(*pairs[(hash + i) % pairs_length]) != true // pair has not been deleted
&& std::get<0>(*pairs[(hash + i) % pairs_length]) == k) // key value is what we are searching for
{
std::get<2>(*pairs[(hash + i) % pairs_length]) = true;
break;
}
// continue in our linear probing
}
}
};
#include <string>
#include <cassert> // we shall do simple tests with assert
int main() {
ProbingHashTable<std::string, int> hashTable;
hashTable.insert("hello, world!", 5);
assert(*hashTable.get("hello, world!") == 5);
assert(hashTable.get("not in hashtable!") == nullptr);
assert(hashTable["hello, world!"] == 5);
hashTable.insert("hello, world!", 10);
assert(*hashTable.get("hello, world!") == 10);
assert(hashTable["hello, world!"] == 10);
hashTable["now in hashtable!"] = 399;
assert(hashTable["now in hashtable!"] == 399);
hashTable["neat"] = 403;
assert(hashTable["neat"] == 403);
hashTable["n"] = 0;
hashTable["na"] = 0;
hashTable["nb"] = 0;
hashTable["nc"] = 0;
hashTable["nca"] = 0;
hashTable["ncad"] = 0;
hashTable["ncadx"] = 0;
hashTable["ncadxx"] = 0;
hashTable["ncadxxx"] = 0;
hashTable["ncadxxxx"] = 0;
hashTable["ncadxxxxx"] = 0;
hashTable["ncadxxxxxx"] = 0;
hashTable["ncadxxxxxxx"] = 0;
hashTable["ncadxxxxxxxx"] = 0;
hashTable["ncadxxxxxxxxx"] = 0;
// we know "neat" is inside the hashTable
assert(hashTable.get("neat") != nullptr);
assert(hashTable["neat"] == 403);
// remove "neat" from hashTable
// marked it for lazy deletion
hashTable.remove("neat");
assert(hashTable.get("neat") == nullptr);
// reinsert "neat"
hashTable.insert("neat", 500);
assert(hashTable.get("neat") != nullptr);
assert(hashTable["neat"] == 500);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment