Skip to content

Instantly share code, notes, and snippets.

@delfigamer
Created December 15, 2023 01:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save delfigamer/e46d53cc42c99bcf7d3fe329f05feb27 to your computer and use it in GitHub Desktop.
Save delfigamer/e46d53cc42c99bcf7d3fe329f05feb27 to your computer and use it in GitHub Desktop.
An implementation of an insertion-ordered stable hash map
// Toggle the `if constexpr` at line 382 to switch between stable_map and std::unordered_multimap
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <functional>
#include <memory>
#include <unordered_map>
struct random_t {
uint64_t state = 0x4d595df4d0f33173u;
static constexpr uint64_t multiplier = 6364136223846793005u;
static constexpr uint64_t increment = 1442695040888963407u;
static uint32_t rotr32(uint32_t x, unsigned r) {
return x >> r | x << ((32 - r) & 31);
}
void uniform(uint32_t& r) {
uint64_t x = state;
unsigned count = (unsigned)(x >> 59);
state = x * multiplier + increment;
x ^= x >> 18;
r = rotr32((uint32_t)(x >> 27), count);
}
};
template<
typename Key,
typename Value,
typename Hash = std::hash<Key>,
typename KeyEqual = std::equal_to<Key>
>
class stable_map: Hash, KeyEqual {
public:
static constexpr size_t index_none = size_t(-1);
private:
static constexpr size_t min_capacity = 16;
static constexpr size_t max_tombstone_value = 255;
struct info_entry {
size_t hash;
size_t next_index;
};
struct memory_ptrs {
uint8_t* tombstones;
Key* keys;
Value* values;
size_t* hash_map;
info_entry* info_entries;
memory_ptrs() {
memset(this, 0, sizeof(memory_ptrs));
}
~memory_ptrs() {
free(tombstones);
free(keys);
free(values);
free(hash_map);
free(info_entries);
}
memory_ptrs& operator=(memory_ptrs const&) = delete;
void reset(memory_ptrs&& other = memory_ptrs()) {
free(tombstones);
free(keys);
free(values);
free(hash_map);
free(info_entries);
tombstones = other.tombstones;
keys = other.keys;
values = other.values;
hash_map = other.hash_map;
info_entries = other.info_entries;
memset((void*)&other, 0, sizeof(memory_ptrs));
}
};
memory_ptrs ptrs;
size_t capacity = 0; // always a power of 2
size_t live_entry_count = 0;
size_t total_entry_count = 0;
// Unline the stable_map::iterator::operator++, this function does not update the tombstone array
template<typename Fn>
void traverse_live_entries_fast(Fn fn) {
size_t index = 0;
while (index < total_entry_count) {
if (ptrs.tombstones[index] == 0) {
fn(index);
index += 1;
} else {
index += ptrs.tombstones[index];
}
}
}
public:
stable_map() {
}
~stable_map() {
traverse_live_entries_fast([&](size_t index) {
ptrs.keys[index].~Key();
ptrs.values[index].~Value();
});
}
void rebuild(size_t new_capacity) {
assert((new_capacity & (new_capacity - 1)) == 0);
assert(new_capacity >= live_entry_count);
if (new_capacity == 0) {
ptrs.reset();
capacity = live_entry_count = total_entry_count = 0;
return;
}
memory_ptrs new_ptrs;
new_ptrs.tombstones = (uint8_t*)malloc(sizeof(uint8_t) * new_capacity);
new_ptrs.keys = (Key*)malloc(sizeof(Key) * new_capacity);
new_ptrs.values = (Value*)malloc(sizeof(Value) * new_capacity);
new_ptrs.hash_map = (size_t*)malloc(sizeof(size_t) * new_capacity);
new_ptrs.info_entries = (info_entry*)malloc(sizeof(info_entry) * new_capacity);
memset(new_ptrs.hash_map, -1, sizeof(size_t) * new_capacity);
size_t new_mask = new_capacity - 1;
size_t t = 0;
traverse_live_entries_fast([&](size_t i) {
new_ptrs.tombstones[t] = 0;
new (&new_ptrs.keys[t]) Key(std::move(ptrs.keys[i]));
new (&new_ptrs.values[t]) Value(std::move(ptrs.values[i]));
ptrs.keys[i].~Key();
ptrs.values[i].~Value();
size_t bucket = ptrs.info_entries[i].hash & new_mask;
new_ptrs.info_entries[t].hash = ptrs.info_entries[i].hash;
new_ptrs.info_entries[t].next_index = new_ptrs.hash_map[bucket];
new_ptrs.hash_map[bucket] = t;
t += 1;
});
ptrs.reset(std::move(new_ptrs));
capacity = new_capacity;
live_entry_count = total_entry_count = t;
}
size_t find_in_bucket(Key const& key, size_t key_hash, size_t* index_ptr) {
while (true) {
size_t index = *index_ptr;
if (index == index_none) {
return index_none;
}
info_entry& info = ptrs.info_entries[index];
if (ptrs.tombstones[index] != 0) {
*index_ptr = info.next_index;
} else {
if (key_hash == info.hash && KeyEqual::operator()(key, ptrs.keys[*index_ptr])) {
return index;
} else {
index_ptr = &info.next_index;
}
}
}
}
size_t find_first_index(Key const& key) {
if (live_entry_count == 0) {
return index_none;
}
size_t key_hash = Hash::operator()(key);
return find_in_bucket(key, key_hash, &ptrs.hash_map[key_hash & (capacity - 1)]);
}
size_t find_next_index(size_t index) {
Key const& key = ptrs.keys[index];
size_t key_hash = ptrs.info_entries[index].hash;
return find_in_bucket(key, key_hash, &ptrs.info_entries[index].next_index);
}
size_t append(Key key, Value value) {
if (total_entry_count >= capacity) {
size_t new_capacity = min_capacity;
while (new_capacity < 2 * live_entry_count) {
new_capacity *= 2;
}
rebuild(new_capacity);
}
size_t index = total_entry_count;
size_t key_hash = Hash::operator()(key);
size_t bucket = key_hash & (capacity - 1);
ptrs.tombstones[index] = 0;
new (&ptrs.keys[index]) Key(std::move(key));
new (&ptrs.values[index]) Value(std::move(value));
ptrs.info_entries[index].hash = key_hash;
ptrs.info_entries[index].next_index = ptrs.hash_map[bucket];
ptrs.hash_map[bucket] = index;
live_entry_count += 1;
total_entry_count += 1;
return index;
}
void erase_at(size_t index) {
if (index == index_none) {
return;
}
assert(ptrs.tombstones[index] == 0);
ptrs.tombstones[index] = 1;
ptrs.keys[index].~Key();
ptrs.values[index].~Value();
live_entry_count -= 1;
if (capacity > min_capacity && live_entry_count * 2 < total_entry_count) {
size_t new_capacity = min_capacity;
while (new_capacity < live_entry_count) {
new_capacity *= 2;
}
rebuild(new_capacity);
}
}
void erase_all(Key const& key) {
size_t index = find_first_index(key);
while (index != index_none) {
size_t next_index = find_next_index(index);
erase_at(index);
index = next_index;
}
}
template<typename Fn>
void debug_print(Fn elem_fn) {
printf("capacity: %llu\n", (long long)capacity);
printf("live_entry_count: %llu\n", (long long)live_entry_count);
printf("total_entry_count: %llu\n", (long long)total_entry_count);
printf("entries:\n");
for (size_t i = 0; i < total_entry_count; ++i) {
if (ptrs.tombstones[i] == 0) {
printf("%llu:\n", (long long)i);
elem_fn(ptrs.keys[i], ptrs.values[i]);
size_t key_hash = Hash::operator()(ptrs.keys[i]);
printf(" stored hash: %#18llx\n", (long long)ptrs.info_entries[i].hash);
if (key_hash != ptrs.info_entries[i].hash) {
printf("!!!!actual hash: %#18llx\n", (long long)key_hash);
}
printf(" expected bucket: %llu\n", (long long)(key_hash & (capacity - 1)));
} else {
printf("%llu: tombstone %i\n", (long long)i, ptrs.tombstones[i]);
}
printf(" next in bucket: %lli\n", (long long)ptrs.info_entries[i].next_index);
}
printf("hash map:\n");
for (size_t i = 0; i < capacity; ++i) {
printf("%llu:", (long long)i);
size_t index = ptrs.hash_map[i];
while (index != index_none) {
printf(" %llu", (long long)index);
index = ptrs.info_entries[index].next_index;
}
printf("\n");
}
}
Key const& key_at(size_t index) const {
return ptrs.keys[index];
}
Value& value_at(size_t index) {
return ptrs.values[index];
}
size_t first_valid_index_at(size_t initial_index) {
auto tombstone_value_at = [&](size_t i) -> size_t {
if (i < total_entry_count) {
return ptrs.tombstones[i];
} else {
return 0;
}
};
size_t tombstone_at_initial_index = tombstone_value_at(initial_index);
if (tombstone_at_initial_index == 0) {
return initial_index;
}
size_t next_index = initial_index + tombstone_at_initial_index;
size_t tombstone_at_next_index = tombstone_value_at(next_index);
if (tombstone_at_next_index == 0) {
return next_index;
}
size_t target_index = next_index + tombstone_at_next_index;
size_t tombstone_at_target_index = tombstone_value_at(target_index);
while (true) {
size_t target_delta = target_index - initial_index;
if (target_delta > max_tombstone_value) {
ptrs.tombstones[initial_index] = max_tombstone_value;
initial_index += max_tombstone_value;
}
if (tombstone_at_target_index == 0) {
break;
}
target_index += tombstone_at_target_index;
tombstone_at_target_index = tombstone_value_at(target_index);
}
assert(target_index - initial_index <= max_tombstone_value);
ptrs.tombstones[initial_index] = target_index - initial_index;
return target_index;
}
struct iterator {
stable_map* target;
size_t index;
Key const& key() const { return target->ptrs.keys[index]; }
Value& value() const { return target->ptrs.values[index]; }
bool operator==(iterator const& other) const { return index == other.index; }
bool operator!=(iterator const& other) const { return index != other.index; }
iterator const& operator*() const {
return *this;
}
iterator const* operator->() const {
return this;
}
iterator& operator++() {
index = target->first_valid_index_at(index + 1);
return *this;
}
};
iterator begin() { return iterator{this, first_valid_index_at(0)}; }
iterator end() { return iterator{this, total_entry_count}; }
struct lookup_iterator {
stable_map* target;
size_t index;
Key const& key() const { return target->ptrs.keys[index]; }
Value& value() const { return target->ptrs.values[index]; }
bool operator==(std::nullptr_t) const { return index == index_none; }
bool operator!=(std::nullptr_t) const { return index != index_none; }
lookup_iterator const& operator*() const {
return *this;
}
lookup_iterator const* operator->() const {
return this;
}
lookup_iterator& operator++() {
index = target->find_next_index(index);
return *this;
}
};
std::pair<lookup_iterator, nullptr_t> equal_range(Key const& key) {
return std::make_pair(lookup_iterator{this, find_first_index(key)}, nullptr);
}
};
int main() {
random_t random;
size_t final_total = 0;
size_t total_step_count = 0x10000000;
size_t inner_step_count = 0x10;
printf("%llu\n", (long long)inner_step_count);
while (total_step_count > 0) {
if constexpr (true) { // `true` for stable_map, `false` for std::unordered_map
stable_map<uint32_t, uint32_t> sm;
for (size_t i = 0; i < inner_step_count; ++i) {
uint32_t c;
random.uniform(c);
switch (c % 4) {
case 0: {
uint32_t x;
random.uniform(x);
sm.append(x % inner_step_count, x);
break;
}
case 1: {
uint32_t x;
random.uniform(x);
sm.erase_all(x % inner_step_count);
break;
}
case 2:
case 3: {
uint32_t x;
random.uniform(x);
for (auto [it, end] = sm.equal_range(x % inner_step_count); it != end; ++it) {
it->value() += x;
}
break;
}
}
}
for (auto& e : sm) {
final_total += (e.key() << 16) + e.value();
}
} else {
std::unordered_multimap<uint32_t, uint32_t> um;
for (size_t i = 0; i < inner_step_count; ++i) {
uint32_t c;
random.uniform(c);
switch (c % 4) {
case 0: {
uint32_t x;
random.uniform(x);
um.insert(std::make_pair(x % inner_step_count, x));
break;
}
case 1: {
uint32_t x;
random.uniform(x);
um.erase(x % inner_step_count);
break;
}
case 2:
case 3: {
uint32_t x;
random.uniform(x);
for (auto [it, end] = um.equal_range(x % inner_step_count); it != end; ++it) {
it->second += x;
}
break;
}
}
}
for (auto& kv : um) {
final_total += (kv.first << 16) + kv.second;
}
}
total_step_count -= inner_step_count;
}
printf("%llu\n", (long long)final_total);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment