Created
December 15, 2023 01:34
-
-
Save delfigamer/e46d53cc42c99bcf7d3fe329f05feb27 to your computer and use it in GitHub Desktop.
An implementation of an insertion-ordered stable hash map
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Toggle the `if constexpr` at line 382 to switch between stable_map and std::unordered_multimap | |
#include <assert.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <functional> | |
#include <memory> | |
#include <unordered_map> | |
struct random_t { | |
uint64_t state = 0x4d595df4d0f33173u; | |
static constexpr uint64_t multiplier = 6364136223846793005u; | |
static constexpr uint64_t increment = 1442695040888963407u; | |
static uint32_t rotr32(uint32_t x, unsigned r) { | |
return x >> r | x << ((32 - r) & 31); | |
} | |
void uniform(uint32_t& r) { | |
uint64_t x = state; | |
unsigned count = (unsigned)(x >> 59); | |
state = x * multiplier + increment; | |
x ^= x >> 18; | |
r = rotr32((uint32_t)(x >> 27), count); | |
} | |
}; | |
template< | |
typename Key, | |
typename Value, | |
typename Hash = std::hash<Key>, | |
typename KeyEqual = std::equal_to<Key> | |
> | |
class stable_map: Hash, KeyEqual { | |
public: | |
static constexpr size_t index_none = size_t(-1); | |
private: | |
static constexpr size_t min_capacity = 16; | |
static constexpr size_t max_tombstone_value = 255; | |
struct info_entry { | |
size_t hash; | |
size_t next_index; | |
}; | |
struct memory_ptrs { | |
uint8_t* tombstones; | |
Key* keys; | |
Value* values; | |
size_t* hash_map; | |
info_entry* info_entries; | |
memory_ptrs() { | |
memset(this, 0, sizeof(memory_ptrs)); | |
} | |
~memory_ptrs() { | |
free(tombstones); | |
free(keys); | |
free(values); | |
free(hash_map); | |
free(info_entries); | |
} | |
memory_ptrs& operator=(memory_ptrs const&) = delete; | |
void reset(memory_ptrs&& other = memory_ptrs()) { | |
free(tombstones); | |
free(keys); | |
free(values); | |
free(hash_map); | |
free(info_entries); | |
tombstones = other.tombstones; | |
keys = other.keys; | |
values = other.values; | |
hash_map = other.hash_map; | |
info_entries = other.info_entries; | |
memset((void*)&other, 0, sizeof(memory_ptrs)); | |
} | |
}; | |
memory_ptrs ptrs; | |
size_t capacity = 0; // always a power of 2 | |
size_t live_entry_count = 0; | |
size_t total_entry_count = 0; | |
// Unline the stable_map::iterator::operator++, this function does not update the tombstone array | |
template<typename Fn> | |
void traverse_live_entries_fast(Fn fn) { | |
size_t index = 0; | |
while (index < total_entry_count) { | |
if (ptrs.tombstones[index] == 0) { | |
fn(index); | |
index += 1; | |
} else { | |
index += ptrs.tombstones[index]; | |
} | |
} | |
} | |
public: | |
stable_map() { | |
} | |
~stable_map() { | |
traverse_live_entries_fast([&](size_t index) { | |
ptrs.keys[index].~Key(); | |
ptrs.values[index].~Value(); | |
}); | |
} | |
void rebuild(size_t new_capacity) { | |
assert((new_capacity & (new_capacity - 1)) == 0); | |
assert(new_capacity >= live_entry_count); | |
if (new_capacity == 0) { | |
ptrs.reset(); | |
capacity = live_entry_count = total_entry_count = 0; | |
return; | |
} | |
memory_ptrs new_ptrs; | |
new_ptrs.tombstones = (uint8_t*)malloc(sizeof(uint8_t) * new_capacity); | |
new_ptrs.keys = (Key*)malloc(sizeof(Key) * new_capacity); | |
new_ptrs.values = (Value*)malloc(sizeof(Value) * new_capacity); | |
new_ptrs.hash_map = (size_t*)malloc(sizeof(size_t) * new_capacity); | |
new_ptrs.info_entries = (info_entry*)malloc(sizeof(info_entry) * new_capacity); | |
memset(new_ptrs.hash_map, -1, sizeof(size_t) * new_capacity); | |
size_t new_mask = new_capacity - 1; | |
size_t t = 0; | |
traverse_live_entries_fast([&](size_t i) { | |
new_ptrs.tombstones[t] = 0; | |
new (&new_ptrs.keys[t]) Key(std::move(ptrs.keys[i])); | |
new (&new_ptrs.values[t]) Value(std::move(ptrs.values[i])); | |
ptrs.keys[i].~Key(); | |
ptrs.values[i].~Value(); | |
size_t bucket = ptrs.info_entries[i].hash & new_mask; | |
new_ptrs.info_entries[t].hash = ptrs.info_entries[i].hash; | |
new_ptrs.info_entries[t].next_index = new_ptrs.hash_map[bucket]; | |
new_ptrs.hash_map[bucket] = t; | |
t += 1; | |
}); | |
ptrs.reset(std::move(new_ptrs)); | |
capacity = new_capacity; | |
live_entry_count = total_entry_count = t; | |
} | |
size_t find_in_bucket(Key const& key, size_t key_hash, size_t* index_ptr) { | |
while (true) { | |
size_t index = *index_ptr; | |
if (index == index_none) { | |
return index_none; | |
} | |
info_entry& info = ptrs.info_entries[index]; | |
if (ptrs.tombstones[index] != 0) { | |
*index_ptr = info.next_index; | |
} else { | |
if (key_hash == info.hash && KeyEqual::operator()(key, ptrs.keys[*index_ptr])) { | |
return index; | |
} else { | |
index_ptr = &info.next_index; | |
} | |
} | |
} | |
} | |
size_t find_first_index(Key const& key) { | |
if (live_entry_count == 0) { | |
return index_none; | |
} | |
size_t key_hash = Hash::operator()(key); | |
return find_in_bucket(key, key_hash, &ptrs.hash_map[key_hash & (capacity - 1)]); | |
} | |
size_t find_next_index(size_t index) { | |
Key const& key = ptrs.keys[index]; | |
size_t key_hash = ptrs.info_entries[index].hash; | |
return find_in_bucket(key, key_hash, &ptrs.info_entries[index].next_index); | |
} | |
size_t append(Key key, Value value) { | |
if (total_entry_count >= capacity) { | |
size_t new_capacity = min_capacity; | |
while (new_capacity < 2 * live_entry_count) { | |
new_capacity *= 2; | |
} | |
rebuild(new_capacity); | |
} | |
size_t index = total_entry_count; | |
size_t key_hash = Hash::operator()(key); | |
size_t bucket = key_hash & (capacity - 1); | |
ptrs.tombstones[index] = 0; | |
new (&ptrs.keys[index]) Key(std::move(key)); | |
new (&ptrs.values[index]) Value(std::move(value)); | |
ptrs.info_entries[index].hash = key_hash; | |
ptrs.info_entries[index].next_index = ptrs.hash_map[bucket]; | |
ptrs.hash_map[bucket] = index; | |
live_entry_count += 1; | |
total_entry_count += 1; | |
return index; | |
} | |
void erase_at(size_t index) { | |
if (index == index_none) { | |
return; | |
} | |
assert(ptrs.tombstones[index] == 0); | |
ptrs.tombstones[index] = 1; | |
ptrs.keys[index].~Key(); | |
ptrs.values[index].~Value(); | |
live_entry_count -= 1; | |
if (capacity > min_capacity && live_entry_count * 2 < total_entry_count) { | |
size_t new_capacity = min_capacity; | |
while (new_capacity < live_entry_count) { | |
new_capacity *= 2; | |
} | |
rebuild(new_capacity); | |
} | |
} | |
void erase_all(Key const& key) { | |
size_t index = find_first_index(key); | |
while (index != index_none) { | |
size_t next_index = find_next_index(index); | |
erase_at(index); | |
index = next_index; | |
} | |
} | |
template<typename Fn> | |
void debug_print(Fn elem_fn) { | |
printf("capacity: %llu\n", (long long)capacity); | |
printf("live_entry_count: %llu\n", (long long)live_entry_count); | |
printf("total_entry_count: %llu\n", (long long)total_entry_count); | |
printf("entries:\n"); | |
for (size_t i = 0; i < total_entry_count; ++i) { | |
if (ptrs.tombstones[i] == 0) { | |
printf("%llu:\n", (long long)i); | |
elem_fn(ptrs.keys[i], ptrs.values[i]); | |
size_t key_hash = Hash::operator()(ptrs.keys[i]); | |
printf(" stored hash: %#18llx\n", (long long)ptrs.info_entries[i].hash); | |
if (key_hash != ptrs.info_entries[i].hash) { | |
printf("!!!!actual hash: %#18llx\n", (long long)key_hash); | |
} | |
printf(" expected bucket: %llu\n", (long long)(key_hash & (capacity - 1))); | |
} else { | |
printf("%llu: tombstone %i\n", (long long)i, ptrs.tombstones[i]); | |
} | |
printf(" next in bucket: %lli\n", (long long)ptrs.info_entries[i].next_index); | |
} | |
printf("hash map:\n"); | |
for (size_t i = 0; i < capacity; ++i) { | |
printf("%llu:", (long long)i); | |
size_t index = ptrs.hash_map[i]; | |
while (index != index_none) { | |
printf(" %llu", (long long)index); | |
index = ptrs.info_entries[index].next_index; | |
} | |
printf("\n"); | |
} | |
} | |
Key const& key_at(size_t index) const { | |
return ptrs.keys[index]; | |
} | |
Value& value_at(size_t index) { | |
return ptrs.values[index]; | |
} | |
size_t first_valid_index_at(size_t initial_index) { | |
auto tombstone_value_at = [&](size_t i) -> size_t { | |
if (i < total_entry_count) { | |
return ptrs.tombstones[i]; | |
} else { | |
return 0; | |
} | |
}; | |
size_t tombstone_at_initial_index = tombstone_value_at(initial_index); | |
if (tombstone_at_initial_index == 0) { | |
return initial_index; | |
} | |
size_t next_index = initial_index + tombstone_at_initial_index; | |
size_t tombstone_at_next_index = tombstone_value_at(next_index); | |
if (tombstone_at_next_index == 0) { | |
return next_index; | |
} | |
size_t target_index = next_index + tombstone_at_next_index; | |
size_t tombstone_at_target_index = tombstone_value_at(target_index); | |
while (true) { | |
size_t target_delta = target_index - initial_index; | |
if (target_delta > max_tombstone_value) { | |
ptrs.tombstones[initial_index] = max_tombstone_value; | |
initial_index += max_tombstone_value; | |
} | |
if (tombstone_at_target_index == 0) { | |
break; | |
} | |
target_index += tombstone_at_target_index; | |
tombstone_at_target_index = tombstone_value_at(target_index); | |
} | |
assert(target_index - initial_index <= max_tombstone_value); | |
ptrs.tombstones[initial_index] = target_index - initial_index; | |
return target_index; | |
} | |
struct iterator { | |
stable_map* target; | |
size_t index; | |
Key const& key() const { return target->ptrs.keys[index]; } | |
Value& value() const { return target->ptrs.values[index]; } | |
bool operator==(iterator const& other) const { return index == other.index; } | |
bool operator!=(iterator const& other) const { return index != other.index; } | |
iterator const& operator*() const { | |
return *this; | |
} | |
iterator const* operator->() const { | |
return this; | |
} | |
iterator& operator++() { | |
index = target->first_valid_index_at(index + 1); | |
return *this; | |
} | |
}; | |
iterator begin() { return iterator{this, first_valid_index_at(0)}; } | |
iterator end() { return iterator{this, total_entry_count}; } | |
struct lookup_iterator { | |
stable_map* target; | |
size_t index; | |
Key const& key() const { return target->ptrs.keys[index]; } | |
Value& value() const { return target->ptrs.values[index]; } | |
bool operator==(std::nullptr_t) const { return index == index_none; } | |
bool operator!=(std::nullptr_t) const { return index != index_none; } | |
lookup_iterator const& operator*() const { | |
return *this; | |
} | |
lookup_iterator const* operator->() const { | |
return this; | |
} | |
lookup_iterator& operator++() { | |
index = target->find_next_index(index); | |
return *this; | |
} | |
}; | |
std::pair<lookup_iterator, nullptr_t> equal_range(Key const& key) { | |
return std::make_pair(lookup_iterator{this, find_first_index(key)}, nullptr); | |
} | |
}; | |
int main() { | |
random_t random; | |
size_t final_total = 0; | |
size_t total_step_count = 0x10000000; | |
size_t inner_step_count = 0x10; | |
printf("%llu\n", (long long)inner_step_count); | |
while (total_step_count > 0) { | |
if constexpr (true) { // `true` for stable_map, `false` for std::unordered_map | |
stable_map<uint32_t, uint32_t> sm; | |
for (size_t i = 0; i < inner_step_count; ++i) { | |
uint32_t c; | |
random.uniform(c); | |
switch (c % 4) { | |
case 0: { | |
uint32_t x; | |
random.uniform(x); | |
sm.append(x % inner_step_count, x); | |
break; | |
} | |
case 1: { | |
uint32_t x; | |
random.uniform(x); | |
sm.erase_all(x % inner_step_count); | |
break; | |
} | |
case 2: | |
case 3: { | |
uint32_t x; | |
random.uniform(x); | |
for (auto [it, end] = sm.equal_range(x % inner_step_count); it != end; ++it) { | |
it->value() += x; | |
} | |
break; | |
} | |
} | |
} | |
for (auto& e : sm) { | |
final_total += (e.key() << 16) + e.value(); | |
} | |
} else { | |
std::unordered_multimap<uint32_t, uint32_t> um; | |
for (size_t i = 0; i < inner_step_count; ++i) { | |
uint32_t c; | |
random.uniform(c); | |
switch (c % 4) { | |
case 0: { | |
uint32_t x; | |
random.uniform(x); | |
um.insert(std::make_pair(x % inner_step_count, x)); | |
break; | |
} | |
case 1: { | |
uint32_t x; | |
random.uniform(x); | |
um.erase(x % inner_step_count); | |
break; | |
} | |
case 2: | |
case 3: { | |
uint32_t x; | |
random.uniform(x); | |
for (auto [it, end] = um.equal_range(x % inner_step_count); it != end; ++it) { | |
it->second += x; | |
} | |
break; | |
} | |
} | |
} | |
for (auto& kv : um) { | |
final_total += (kv.first << 16) + kv.second; | |
} | |
} | |
total_step_count -= inner_step_count; | |
} | |
printf("%llu\n", (long long)final_total); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment