Created
April 22, 2021 23:11
-
-
Save KaceCottam/bec45f16bf3000289525a19e6f6f2e92 to your computer and use it in GitHub Desktop.
Various rehashing tables made in C++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Created by Kace Cottam, 4/20/2021 | |
#include <array> | |
#include <list> | |
#include <utility> // std::pair | |
// std::unordered_map<Key, Value, Hash=std::hash<Key>> | |
template<class Key, class Value, int NumberOfBuckets = 117, class Hash = std::hash<Key>> | |
class ChainingHashTable { | |
private: | |
using KeyValuePair = std::pair<Key, Value>; // type alias | |
using Bucket = std::list<KeyValuePair>; // type alias | |
std::array<Bucket, NumberOfBuckets> buckets; | |
KeyValuePair* lookup(Bucket& b, const Key& key) { | |
for (KeyValuePair& pair : b) { | |
if (pair.first == key) { | |
return &pair; | |
} | |
} | |
return nullptr; | |
} | |
public: | |
void insert(const Key& k, const Value& v) { | |
// calculate hash | |
int hashed_value = abs(Hash{}(k)); | |
// find the bucket | |
Bucket& chosen_bucket = buckets[hashed_value % NumberOfBuckets]; | |
// lookup to see if this key is in the hashtable | |
KeyValuePair* kvp = lookup(chosen_bucket, k); | |
if (kvp == nullptr) { // if it is not, | |
// insert a new pair | |
chosen_bucket.push_front(std::make_pair(k, v)); | |
} else { // if it is, | |
// change the value of that pair | |
kvp->second = v; | |
} | |
} | |
Value* get(const Key& k) { | |
// calculate hash | |
int hashed_value = abs(Hash{}(k)); | |
// find the bucket | |
Bucket& chosen_bucket = buckets[hashed_value % NumberOfBuckets]; | |
// lookup to see if this key is in the hashtable | |
KeyValuePair* kvp = lookup(chosen_bucket, k); | |
if (kvp == nullptr) { // if it is not, | |
// nothing to return | |
return nullptr; | |
} else { // if it is, | |
// return address of the value; | |
return &kvp->second; | |
} | |
} | |
Value& operator[](const Key& k) { | |
// it will return the reference to the key's | |
// value if it exists, | |
// if it does not exist, it will create a | |
// new KeyValuePair and return a reference | |
// to that value. | |
// we will use get first | |
Value* found_value = this->get(k); | |
if (found_value != nullptr) { | |
return *found_value; | |
} | |
// now we NEED to insert a new value (found_value == nullptr) | |
// calculate hash | |
int hashed_value = abs(Hash{}(k)); | |
// find the bucket | |
Bucket& chosen_bucket = buckets[hashed_value % NumberOfBuckets]; | |
// add new key-value-pair to front of the bucket | |
chosen_bucket.push_front(std::make_pair(k, Value())); | |
// get a reference to the first item in the bucket (the pair we just inserted) | |
KeyValuePair& kvp = *chosen_bucket.begin(); | |
return kvp.second; | |
} | |
}; | |
#include <string> | |
#include <cassert> | |
int main() { | |
ChainingHashTable<std::string, int> hashTable; | |
hashTable.insert("hello, world!", 5); | |
assert(*hashTable.get("hello, world!") == 5); | |
assert(hashTable.get("not in hashtable!") == nullptr); | |
assert(hashTable["hello, world!"] == 5); | |
hashTable.insert("hello, world!", 10); | |
assert(*hashTable.get("hello, world!") == 10); | |
assert(hashTable["hello, world!"] == 10); | |
hashTable["now in hashtable!"] = 399; | |
assert(hashTable["now in hashtable!"] == 399); | |
hashTable["neat"] = 403; | |
assert(hashTable["neat"] == 403); | |
hashTable["n"] = 0; | |
hashTable["na"] = 0; | |
hashTable["nb"] = 0; | |
hashTable["nc"] = 0; | |
hashTable["nca"] = 0; | |
hashTable["ncad"] = 0; | |
hashTable["ncadx"] = 0; | |
hashTable["ncadxx"] = 0; | |
hashTable["ncadxxx"] = 0; | |
hashTable["ncadxxxx"] = 0; | |
hashTable["ncadxxxxx"] = 0; | |
hashTable["ncadxxxxxx"] = 0; | |
hashTable["ncadxxxxxxx"] = 0; | |
hashTable["ncadxxxxxxxx"] = 0; | |
hashTable["ncadxxxxxxxxx"] = 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Created by Kace Cottam, 4/22/2021 | |
#include <memory> // std::shared_ptr | |
#include <utility> // std::tuple | |
#include <vector> | |
#include <stdexcept> // std::runtime_error | |
// std::unordered_map<Key, Value, Hash=std::hash<Key>> | |
template <class Key, class Value, class Hash = std::hash<Key>> | |
class ProbingHashTable { | |
private: | |
using LazyDeletionPair = std::tuple<Key, Value, bool>; // type alias | |
using OptionalLazyDeletionPair = std::shared_ptr<LazyDeletionPair>; // type alias | |
std::vector<OptionalLazyDeletionPair> pairs; // will start at size 11 | |
int inserted_values = 0; // want to keep track so we tell when we hit our threshold | |
// for resizing | |
public: | |
ProbingHashTable() { | |
pairs.resize(11); // starting value from which we can resize | |
} | |
void insert(const Key& k, const Value& v) { | |
// insert key value pair or change a key's value | |
int pairs_length = pairs.size(); | |
int hash = abs(Hash()(k)); // figure out the hash (must be postitive) | |
for (int i = 0; /*infinite loop*/; i++) { | |
int idx = (hash + i) % pairs_length; | |
if (pairs[idx] == nullptr) { | |
// nothing at this position | |
// create a new thing | |
pairs[idx] = std::make_shared<LazyDeletionPair>(k, v, false); | |
inserted_values += 1; | |
break; | |
} else if (std::get<2>(*pairs[idx]) == true) { | |
// if marked for lazy deletion | |
pairs[idx] = std::make_shared<LazyDeletionPair>(k, v, false); | |
break; | |
// overwrite what is there | |
} else if (std::get<0>(*pairs[idx]) == k) { | |
// modifying the existing thing | |
std::get<1>(*pairs[idx]) = v; | |
break; | |
} | |
// continue in our linear probing | |
} | |
// we want to resize our pairs vector when we find that the number of | |
// inserted elements is 2/3 of our max capacity | |
// (to prevent collisions) | |
const float threshold = 2.0 / 3.0; // 1/3 of the pockets are empty | |
const float current_ratio_of_inserted_values_to_size = | |
(float)inserted_values / (pairs_length + 1); | |
if (current_ratio_of_inserted_values_to_size >= threshold) { | |
// we want to resize our pairs vector | |
// we will need to rehash our values | |
std::vector<OptionalLazyDeletionPair> oldpairs = this->pairs; | |
this->pairs.clear(); | |
this->pairs.resize(oldpairs.size() * 2 + 1); | |
this->inserted_values = 0; | |
for (auto pair : oldpairs) if (pair != nullptr) { // O(N) | |
auto key = std::move(std::get<0>(*pair)); | |
auto value = std::move(std::get<1>(*pair)); | |
auto deleted = std::move(std::get<2>(*pair)); | |
if (!deleted) { // we can ignore deleted values since we are now resetting the hashTable | |
this->insert(key, value); // recursive call ALWAYS O(1) | |
// threshold was *barely* met, so since we doubled the length of pairs, | |
// the inserted_pairs will always be under the threshold. | |
// this will insert in `this->pairs` | |
} | |
} | |
} | |
} | |
Value* get(const Key& k) { | |
// insert key value pair or change a key's value | |
int hash = abs(Hash()(k)); // figure out the hash (must be postitive) | |
int pairs_length = pairs.size(); | |
for (int i = 0; /*infinite loop*/; i++) { | |
int idx = (hash + i) % pairs_length; | |
if (pairs[idx] == nullptr) { | |
return nullptr; | |
} | |
else if (std::get<2>(*pairs[idx]) != true // pair has not been deleted | |
&& std::get<0>(*pairs[idx]) == k) // key value is what we are searching for | |
{ | |
return &std::get<1>(*pairs[idx]); | |
} | |
// continue in our linear probing | |
} | |
} | |
Value& operator[](const Key& k) { | |
// it will return the reference to the key's | |
// value if it exists, | |
// if it does not exist, it will create a | |
// new KeyValuePair and return a reference | |
// to that value. | |
auto value = this->get(k); | |
if (value != nullptr) { | |
return *value; | |
} | |
this->insert(k, Value{}); // call default constructor for value | |
if (auto value = this->get(k)) { | |
return *value; // return value now | |
} | |
// wasn't able to create the key for some reason (shouldn't happen) | |
throw std::runtime_error("Probing Hash Table operator[] bad insertion"); | |
} | |
void remove(const Key& k) { | |
int hash = abs(Hash()(k)); // figure out the hash (must be postitive) | |
int pairs_length = pairs.size(); | |
for (int i = 0; pairs[(hash + i) % pairs_length] != nullptr; i++) { // while we are not at an empty pocket | |
if (std::get<2>(*pairs[(hash + i) % pairs_length]) != true // pair has not been deleted | |
&& std::get<0>(*pairs[(hash + i) % pairs_length]) == k) // key value is what we are searching for | |
{ | |
std::get<2>(*pairs[(hash + i) % pairs_length]) = true; | |
break; | |
} | |
// continue in our linear probing | |
} | |
} | |
}; | |
#include <string> | |
#include <cassert> // we shall do simple tests with assert | |
int main() { | |
ProbingHashTable<std::string, int> hashTable; | |
hashTable.insert("hello, world!", 5); | |
assert(*hashTable.get("hello, world!") == 5); | |
assert(hashTable.get("not in hashtable!") == nullptr); | |
assert(hashTable["hello, world!"] == 5); | |
hashTable.insert("hello, world!", 10); | |
assert(*hashTable.get("hello, world!") == 10); | |
assert(hashTable["hello, world!"] == 10); | |
hashTable["now in hashtable!"] = 399; | |
assert(hashTable["now in hashtable!"] == 399); | |
hashTable["neat"] = 403; | |
assert(hashTable["neat"] == 403); | |
hashTable["n"] = 0; | |
hashTable["na"] = 0; | |
hashTable["nb"] = 0; | |
hashTable["nc"] = 0; | |
hashTable["nca"] = 0; | |
hashTable["ncad"] = 0; | |
hashTable["ncadx"] = 0; | |
hashTable["ncadxx"] = 0; | |
hashTable["ncadxxx"] = 0; | |
hashTable["ncadxxxx"] = 0; | |
hashTable["ncadxxxxx"] = 0; | |
hashTable["ncadxxxxxx"] = 0; | |
hashTable["ncadxxxxxxx"] = 0; | |
hashTable["ncadxxxxxxxx"] = 0; | |
hashTable["ncadxxxxxxxxx"] = 0; | |
// we know "neat" is inside the hashTable | |
assert(hashTable.get("neat") != nullptr); | |
assert(hashTable["neat"] == 403); | |
// remove "neat" from hashTable | |
// marked it for lazy deletion | |
hashTable.remove("neat"); | |
assert(hashTable.get("neat") == nullptr); | |
// reinsert "neat" | |
hashTable.insert("neat", 500); | |
assert(hashTable.get("neat") != nullptr); | |
assert(hashTable["neat"] == 500); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment