Skip to content

Instantly share code, notes, and snippets.

@assyrianic
Created July 14, 2021 03:00
Show Gist options
  • Save assyrianic/e22e29c62d2fac7a89fcacc94fb48af7 to your computer and use it in GitHub Desktop.
Save assyrianic/e22e29c62d2fac7a89fcacc94fb48af7 to your computer and use it in GitHub Desktop.
cache friendly, order-preserving hash table.
/**
* ordered map for SourcePawn.
* Author: Kevin Yonan, (C) 2021.
* License: MIT
*/
#ifndef ORDMAP_INCLUDED
# define ORDMAP_INCLUDED
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <vector>
#include <string>
#include <random>
#include <memory>
#include <variant>
static size_t bool_hash(const bool key, const size_t seed=0) {
size_t h = seed + 1;
if( key ) {
h = 1 + (h << 6) + (h << 16) - h;
} else {
h = (h << 6) + (h << 16) - h;
}
return h;
}
static size_t int_hash(const size_t key, const size_t seed=0) {
size_t h = seed;
for( size_t n=0; n < sizeof key * 8; n += 8 ) {
h = ((key >> n) & 0xFF) + (h << 6) + (h << 16) - h;
}
return h;
}
static size_t float_hash(const float key, const size_t seed=0) {
size_t n = 0; memcpy(&n, &key, sizeof key);
return int_hash(n, seed);
}
template< typename T >
static size_t array_hash(T key, const size_t len, const size_t seed=0) {
size_t h = seed;
for( size_t i=0; i < len; i++ ) {
h = ( size_t )(key[i]) + (h << 6) + (h << 16) - h;
}
return h;
}
typedef std::variant<
/// int types.
bool, size_t, int, intptr_t,
/// self explanatory.
float,
/// string types.
const char*,
const uint8_t*,
const bool*,
const int*,
const void*
> OrdMapHashable;
static size_t tg_hash(const OrdMapHashable &key, const size_t len, const size_t seed=0) {
switch( key.index() ) {
case 0: return bool_hash(std::get< bool >(key), seed);
case 1: return int_hash(std::get< size_t >(key), seed);
case 2: return int_hash(std::get< int >(key), seed);
case 3: return int_hash(std::get< intptr_t >(key), seed);
case 4: return float_hash(std::get< float >(key), seed);
case 5: return array_hash(std::get< const char* >(key), len, seed);
case 6: return array_hash(std::get< const uint8_t* >(key), len, seed);
case 7: return array_hash(std::get< const bool* >(key), len, seed);
case 8: return array_hash(std::get< const int* >(key), len, seed);
default: {
size_t h = seed;
const uint8_t *k = reinterpret_cast< const uint8_t* >(std::get< const void* >(key));
for( size_t i=0; i < len; i++ ) {
h = ( size_t )(k[i]) + (h << 6) + (h << 16) - h;
}
return h;
}
}
}
template< typename T >
static std::unique_ptr< T[] > resize_unique_ptr(std::unique_ptr< T[] > &p, const size_t new_size, const size_t old_size) {
std::unique_ptr< T[] > new_ptr = std::make_unique< T[] >(new_size);
for( size_t i=0; i<old_size; i++ ) {
new_ptr[i] = std::move(p[i]);
}
return new_ptr;
}
struct PawnArray {
std::unique_ptr< cell_t[] > t;
size_t l;
PawnArray(const cell_t *arr, const size_t len) {
t = std::make_unique< cell_t[] >(len);
l = len;
for( size_t i=0; i<len; i++ ) {
t[i] = arr[i];
}
}
};
struct PawnStr {
std::unique_ptr< char[] > t;
size_t l;
PawnStr(const char *str, size_t len=0) {
if( len==0 ) {
len = strlen(str);
}
t = std::make_unique< char[] >(len + 1);
l = len + 1;
strcpy(t.get(), str);
}
};
struct PawnFunc {
cell_t fnid;
PawnFunc(cell_t n) { fnid = n; }
};
typedef std::vector< size_t* > OrdMapBucket;
template< typename Entry >
struct OrdMap {
std::unique_ptr< OrdMapBucket[] > buckets;
std::unique_ptr< Entry[] > datum;
std::unique_ptr< OrdMapHashable[] > keys;
std::unique_ptr< size_t[] > hashes, keylens;
size_t cap, len, seed;
OrdMap(const size_t init_size = 4) {
keys = std::make_unique< OrdMapHashable[] >(init_size);
datum = std::make_unique< Entry[] >(init_size);
keylens = std::make_unique< size_t[] >(init_size);
hashes = std::make_unique< size_t[] >(init_size);
buckets = std::make_unique< OrdMapBucket[] >(init_size);
cap = init_size;
len = 0;
std::random_device rd; std::mt19937 gen( rd() );
std::uniform_int_distribution< size_t > distrib(0, SIZE_MAX);
seed = distrib(gen);
}
~OrdMap() {
for( size_t i=0; i<cap; i++ ) {
buckets[i].clear();
}
len = seed = cap = 0;
}
bool HasKey(const OrdMapHashable &key, const size_t keylen) const {
return GetKeyIdx(key, keylen) < len;
}
size_t KeySet(const OrdMapHashable &key, const size_t keylen, Entry val) {
size_t index = GetKeyIdx(key, keylen);
if( index < len ) {
datum[index] = std::move(val);
return index;
} else if( len >= cap && !_rehash(cap << 1) ) {
return SIZE_MAX;
}
index = len;
keys[index] = key;
datum[index] = std::move(val);
hashes[index] = tg_hash(key, keylen, seed);
_insert_entry(index);
keylens[index] = keylen;
len++;
return index;
}
size_t GetKeyIdx(OrdMapHashable key, const size_t keylen) const {
const size_t hash = tg_hash(key, keylen, seed);
const size_t index = hash & (cap - 1);
for( auto n : buckets[index] ) {
const uintptr_t arr_index = (( uintptr_t )(n) - ( uintptr_t )(hashes.get())) / sizeof *n;
if( *n==hash && keylens[arr_index]==keylen && keys[arr_index]==key )
return arr_index;
}
return SIZE_MAX;
}
Entry &KeyGet(const OrdMapHashable &key, const size_t keylen) const {
const size_t entry = GetKeyIdx(key, keylen);
return IdxGet(entry);
}
Entry &IdxGet(const size_t index) const {
return( index >= len ) ? datum[cap-1] : datum[index];
}
bool IdxSet(const size_t index, Entry val) {
if( index >= len ) {
return false;
}
datum[index] = std::move(val);
return true;
}
bool KeyRm(const OrdMapHashable &key, const size_t keylen) {
return IdxRm(GetKeyIdx(key, keylen));
}
bool IdxRm(const size_t n) {
if( n >= len )
return false;
const size_t index = hashes[n] & (cap - 1);
std::vector< size_t* > &bucket = buckets[index];
const size_t entry = OrdMap::_get_vec_idx(bucket, &hashes[n]);
if( entry==SIZE_MAX )
return false;
bucket.erase(bucket.begin() + entry);
hashes[n] = keylens[n] = 0;
OrdMap::_array_shift_up< OrdMapHashable >(keys.get(), len, n);
OrdMap::_array_shift_up< Entry >(datum.get(), len, n);
OrdMap::_array_shift_up< size_t >(hashes.get(), len, n);
OrdMap::_array_shift_up< size_t >(keylens.get(), len, n);
len--;
return true;
}
void Clear() {
for( size_t i=0; i<len; i++ ) {
keylens[i] = 0;
hashes[i] = 0;
datum[i] = Entry();
}
for( size_t i=0; i<cap; i++ ) {
buckets[i].clear();
}
len = 0;
}
Entry& operator[](const size_t index) {
return( index >= len )? datum[cap-1] : datum[index];
}
const Entry& operator[](const size_t index) const {
return( index >= len )? datum[cap-1] : datum[index];
}
Entry& operator[](const char *key) {
const size_t len = strlen(key);
size_t index = GetKeyIdx(key, len + 1);
if( index==SIZE_MAX ) {
index = KeySet(key, len + 1, Entry());
}
return datum[index];
}
const Entry& operator[](const char *key) const {
const size_t len = strlen(key);
size_t index = GetKeyIdx(key, len + 1);
if( index==SIZE_MAX ) {
return datum[cap - 1]; /// valid reference but invalid data!
}
return datum[index];
}
private:
void _insert_entry(const size_t n) {
buckets[hashes[n] & (cap - 1)].push_back(&hashes[n]);
}
void _resize_vecs(const size_t new_size) {
keys = resize_unique_ptr< OrdMapHashable >(keys, new_size, cap);
keylens = resize_unique_ptr< size_t >(keylens, new_size, cap);
hashes = resize_unique_ptr< size_t >(hashes, new_size, cap);
datum = resize_unique_ptr< Entry >(datum, new_size, cap);
}
bool _rehash(const size_t new_size) {
const size_t old_cap = cap;
std::vector< size_t* > *curr = buckets.release();
buckets = std::make_unique< OrdMapBucket[] >(new_size);
_resize_vecs(new_size);
cap = new_size;
for( size_t i=0; i<old_cap; i++ ) {
curr[i].clear();
}
delete[] curr; curr = nullptr;
for( size_t i=0; i<len; i++ ) {
_insert_entry(i);
}
return true;
}
template< typename T >
static void _array_shift_up(T *const buf, size_t len, const size_t index) {
if( index >= len ) {
return;
}
for( size_t j=index, i=index+1; i<len; i++, j++ ) {
buf[j] = std::move(buf[i]);
}
len--;
buf[len] = T();
}
static size_t _get_vec_idx(std::vector< size_t* > &bucket, const size_t *const p) {
for( size_t i=0; i<bucket.size(); i++ ) {
if( bucket[i]==p ) {
return i;
}
}
return SIZE_MAX;
}
};
#endif /** ORDMAP_INCLUDED */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment