Created
December 9, 2016 16:12
-
-
Save matklad/3fa22b8a2550c5e2c338f05ecc7bdea4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <cassert> | |
#include <cstddef> | |
#include <cstdint> | |
#include <cstdlib> | |
#include <ctime> | |
#include <forward_list> | |
#include <iostream> | |
#include <list> | |
#include <unordered_set> | |
#include <vector> | |
// Better: power of two and a proper hash function. | |
const size_t capacity = 1'000'007; // Yay C++14!! | |
const double load_factor = 0.9; | |
struct std_us { | |
std_us() : data_() { data_.reserve(capacity); } | |
void add(uint32_t x) { data_.insert(x); } | |
bool contains(uint32_t x) { return data_.find(x) != end(data_); } | |
private: | |
std::unordered_set<uint32_t> data_; | |
}; | |
struct chaining { | |
chaining() : data_(capacity, std::forward_list<uint32_t>()) {} | |
void add(uint32_t x) { | |
size_t idx = hash(x); | |
// duplicates :( | |
data_[idx].push_front(x); | |
} | |
bool contains(uint32_t x) { | |
auto const &bucket = data_[hash(x)]; | |
return std::find(begin(bucket), end(bucket), x) != end(bucket); | |
} | |
private: | |
// forward_list -> vector = 3x as much space. | |
// Can steal one element though... | |
std::vector<std::forward_list<uint32_t>> data_; | |
size_t hash(uint32_t x) { return x % capacity; } | |
}; | |
struct chaining2h { | |
chaining2h() : data_(capacity, std::list<uint32_t>()) {} | |
void add(uint32_t x) { | |
auto &bucket1 = data_[hash1(x)]; | |
auto &bucket2 = data_[hash1(x)]; | |
(bucket1.size() < bucket2.size() ? bucket1 : bucket2).push_front(x); | |
} | |
bool contains(uint32_t x) { | |
return bucket_contains(data_[hash1(x)], x) || | |
bucket_contains(data_[hash2(x)], x); | |
} | |
private: | |
bool bucket_contains(std::list<uint32_t> const &bucket, uint32_t x) { | |
return std::find(begin(bucket), end(bucket), x) != end(bucket); | |
} | |
std::vector<std::list<uint32_t>> data_; | |
size_t hash1(uint32_t x) { return x % capacity; } | |
size_t hash2(uint32_t x) { return (x * 15485863) % capacity; } | |
}; | |
struct linear_probing { | |
linear_probing() : data_(capacity, EMPTY) {} | |
void add(uint32_t x) { data_[insertion_index(x)] = x; } | |
bool contains(uint32_t x) { return data_[insertion_index(x)] == x; } | |
private: | |
const uint32_t EMPTY = 0; | |
const uint32_t TOMBSTONE = 1; | |
std::vector<uint32_t> data_; | |
size_t insertion_index(uint32_t x) { | |
assert(x != EMPTY && x != TOMBSTONE); // :( | |
auto index = hash(x); | |
while (data_[index] != 0 && data_[index] != x) { | |
index = (index + 1) % capacity; | |
} | |
return index; | |
} | |
size_t hash(uint32_t x) { return x % capacity; } | |
}; | |
struct robin_hood { | |
robin_hood() : data_(capacity, EMPTY) {} | |
void add(uint32_t x) { | |
assert(x != EMPTY && x != TOMBSTONE); // :( | |
auto index = hash(x); | |
size_t curr_probe = 0; | |
while (data_[index] != 0 && data_[index] != x) { | |
auto existing_probe = probe_dist(index); | |
if (existing_probe < curr_probe) { | |
std::swap(x, data_[index]); | |
curr_probe = existing_probe; | |
} | |
index = (index + 1) % capacity; | |
++curr_probe; | |
} | |
data_[index] = x; | |
} | |
bool contains(uint32_t x) { | |
assert(x != EMPTY && x != TOMBSTONE); // :( | |
auto index = hash(x); | |
size_t curr_probe = 0; | |
while (data_[index] != 0 && data_[index] != x) { | |
auto existing_probe = probe_dist(index); | |
if (existing_probe < curr_probe) { | |
return false; // better than plain linear probing | |
} | |
index = (index + 1) % capacity; | |
++curr_probe; | |
} | |
return data_[index] == x; | |
} | |
private: | |
const uint32_t EMPTY = 0; | |
const uint32_t TOMBSTONE = 1; | |
std::vector<uint32_t> data_; | |
size_t hash(uint32_t x) { return x % capacity; } | |
size_t probe_dist(size_t index) { | |
auto ideal_index = hash(data_[index]); | |
return (index + capacity - ideal_index) % capacity; | |
} | |
}; | |
struct cuckoo { | |
const uint32_t EMPTY = 0; | |
void add(uint32_t x) { | |
for (;;) { | |
assert(x != EMPTY); | |
auto index = hash1(x); | |
std::swap(x, data1_[index]); | |
if (x == EMPTY) | |
return; | |
index = hash2(x); | |
std::swap(x, data2_[index]); | |
if (x == EMPTY) | |
return; | |
} | |
} | |
bool contains(uint32_t x) { | |
assert(x != EMPTY); | |
return data1_[hash1(x)] == x || data2_[hash2(x)] == x; | |
} | |
cuckoo() : data1_(capacity, EMPTY), data2_(capacity, EMPTY) {} | |
private: | |
std::vector<uint32_t> data1_; | |
std::vector<uint32_t> data2_; | |
size_t hash1(uint32_t x) { return x % capacity; } | |
size_t hash2(uint32_t x) { return (x * 15485863) % capacity; } | |
}; | |
// use std::chrono | |
clock_t tick_begin; | |
void tick() { tick_begin = clock(); } | |
double tack() { return (clock() - tick_begin); } | |
std::vector<uint32_t> random_numbers() { | |
// use proper C++ random numbers! | |
std::vector<uint32_t> result; | |
for (size_t i = 0; i < capacity * load_factor; i++) { | |
result.push_back(rand()); | |
} | |
return result; | |
} | |
template <typename T> | |
void bench(std::vector<uint32_t> const &to_add, | |
std::vector<uint32_t> const &to_lookup) { | |
srand(time(nullptr)); | |
T map; | |
tick(); | |
for (auto x : to_add) { | |
map.add(x); | |
} | |
std::cout << "insertion " << tack() << std::endl; | |
tick(); | |
for (auto x : to_add) { | |
if (!map.contains(x)) { | |
std::cout << "oups :(" << std::endl; | |
} | |
} | |
std::cout << "successful lookup " << tack() << std::endl; | |
tick(); | |
uint32_t num_failures = 0; | |
for (auto x : to_lookup) { | |
if (!map.contains(x)) { | |
num_failures += 1; | |
} | |
} | |
std::cout << "failed lookup " << tack() << std::endl; | |
std::cout << "failures " << num_failures << std::endl; | |
} | |
int main() { | |
auto to_add = random_numbers(); | |
auto to_lookup = random_numbers(); | |
std::cout << "std::unordered_set" << std::endl; | |
bench<std_us>(to_add, to_lookup); | |
std::cout << "\nchaining" << std::endl; | |
bench<chaining>(to_add, to_lookup); | |
std::cout << "\nchaining two hashes" << std::endl; | |
bench<chaining2h>(to_add, to_lookup); | |
std::cout << "\nlinear probing" << std::endl; | |
bench<linear_probing>(to_add, to_lookup); | |
std::cout << "\nrobin hood" << std::endl; | |
bench<robin_hood>(to_add, to_lookup); | |
// std::cout << "\ncuckoo" << std::endl; | |
// bench<cuckoo>(to_add, to_lookup); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment