Skip to content

Instantly share code, notes, and snippets.

@ayende
Created November 14, 2019 19:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ayende/5aaa6500645ad25228ab43c96f8f849e to your computer and use it in GitHub Desktop.
Save ayende/5aaa6500645ad25228ab43c96f8f849e to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <malloc.h>
#include <stdint.h>
#include <string.h>
#include <inttypes.h>
#include <stdbool.h>
typedef struct datum {
uint64_t key;
uint64_t value;
} datum_t;
typedef struct hash_bucket {
uint16_t size;
uint16_t capacity;
uint8_t depth;
datum_t values[0];
} hash_bucket_t;
typedef struct hash_directory {
uint32_t size;
uint32_t allocated_pages;
uint8_t depth;
hash_bucket_t* buckets[0];
} hash_directory_t;
typedef struct hash_ctx {
size_t page_size;
void* (*allocate_page)(uint32_t n);
void (*release_page)(void* p);
hash_directory_t* dir;
} hash_ctx_t;
void print_bits(uint64_t v, int depth)
{
unsigned char* b = (unsigned char*)&v;
unsigned char byte;
int i, j;
int bits = sizeof(uint64_t) * 8;
bool foundNonZero = false;
for (i = sizeof(uint64_t) - 1; i >= 0; i--)
{
for (j = 7; j >= 0; j--)
{
bits--;
byte = (b[i] >> j) & 1;
if (byte) {
foundNonZero = true;
}
if (!foundNonZero && bits > depth && bits > 8)
continue;
printf("%u", byte);
if(bits == depth)
printf(" ");
}
}
}
void print_bucket(hash_bucket_t* b, uint32_t i) {
printf("\tbucket_%p [label=\"Depth: %i, Size: %i\\lCapacity: %i\\l--------\\l", b, b->depth, b->size, b->capacity);
for (size_t i = 0; i < b->size; i++)
{
print_bits(b->values[i].key, b->depth);
printf(" \\| %4llu = %4llu\\l", b->values[i].key, b->values[i].value);
}
printf("\"]\n");
}
void print_dir_graphviz(hash_ctx_t* ctx) {
printf("digraph hash {\n\tnode[shape = record ]; \n");
printf("\ttable [label=\"Depth: %i, Size: %i\\lPages: %i, Page Size: %i\\l\"]\n", ctx->dir->depth, ctx->dir->size, ctx->dir->allocated_pages, ctx->page_size);
printf("\tbuckets [label=\"");
for (size_t i = 0; i < ctx->dir->size; i++)
{
if (i != 0)
printf("|");
printf("<bucket_%i> %i - %p ", i, i, ctx->dir->buckets[i]);
}
printf("\"]\n");
for (size_t i = 0; i < ctx->dir->size; i++)
{
print_bucket(ctx->dir->buckets[i], i);
}
for (size_t i = 0; i < ctx->dir->size; i++) {
printf("\tbuckets:bucket_%i -> bucket_%p;\n", i, ctx->dir->buckets[i]);
}
printf("\ttable->buckets;\n}\n");
}
uint32_t hash_table_bucket_number(hash_ctx_t* ctx, uint64_t h) {
return h & (((uint64_t)1 << ctx->dir->depth) - 1);
}
size_t hash_table_get_directory_capacity(hash_ctx_t* ctx) {
return ((ctx->dir->allocated_pages * ctx->page_size) - sizeof(hash_directory_t)) / sizeof(hash_bucket_t);
}
hash_bucket_t* create_hash_bucket(hash_ctx_t* ctx) {
hash_bucket_t* b = ctx->allocate_page(1);
if (b == NULL)
return NULL;
b->size = 0;
b->capacity = (uint16_t)((ctx->page_size - sizeof(hash_bucket_t)) / sizeof(datum_t));
b->depth = ctx->dir->depth;
return b;
}
bool hash_table_get(hash_ctx_t* ctx, uint64_t key, uint64_t* value) {
uint32_t bucket_idx = hash_table_bucket_number(ctx, key);
hash_bucket_t* b = ctx->dir->buckets[bucket_idx];
for (size_t i = 0; i < b->size; i++)
{
if (b->values[i].key == key) {
*value = b->values[i].value;
return true;
}
}
return false;
}
bool hash_table_put(hash_ctx_t* ctx, uint64_t key, uint64_t value) {
uint32_t bucket_idx = hash_table_bucket_number(ctx, key);
hash_bucket_t* b = ctx->dir->buckets[bucket_idx];
for (size_t i = 0; i < b->size; i++)
{
if (b->values[i].key == key) {
b->values[i].value = value;
return true;
}
}
if (b->capacity > b->size) {
b->values[b->size++] = (datum_t){ key, value };
return true;
}
if (ctx->dir->depth == b->depth) {
hash_directory_t* new_dir;
if (ctx->dir->size * 2 > hash_table_get_directory_capacity(ctx)) {
// have to increase the actual allocated memory here
new_dir = ctx->allocate_page(ctx->dir->allocated_pages * 2);
if (new_dir == NULL)
return false;
size_t dir_size = ctx->dir->allocated_pages * ctx->page_size;
memcpy(new_dir, ctx->dir, dir_size);
new_dir->allocated_pages *= 2;
}
else {
new_dir = ctx->dir; // there is enough space to increase size without allocations
}
size_t buckets_size = ctx->dir->size * sizeof(hash_bucket_t*);
memcpy((char*)new_dir + sizeof(hash_directory_t) + buckets_size, (char*)ctx->dir + sizeof(hash_directory_t), buckets_size);
new_dir->depth++;
new_dir->size = ctx->dir->size * 2;
if (new_dir != ctx->dir) {
ctx->release_page(ctx->dir);
ctx->dir = new_dir;
}
}
hash_bucket_t* n = create_hash_bucket(ctx);
if (!n)
return false;
hash_bucket_t* tmp = ctx->allocate_page(1);
if (!tmp) {
ctx->release_page(n);
ctx->release_page(tmp);
// no need to release the ctx->dir we allocated, was wired
// properly to the table and will be freed with the whole table
return false;
}
memcpy(tmp, b, ctx->page_size);
n->depth = b->depth = tmp->depth + 1;
b->size = 0;
uint32_t bit = 1 << tmp->depth;
for (size_t i = 0; i < tmp->size; i++)
{
hash_bucket_t* p = tmp->values[i].key & bit ? n : b;
p->values[p->size++] = tmp->values[i];
}
// now can add the new value in...
{
hash_bucket_t* p = key & bit ? n : b;
p->values[p->size++] = (datum_t){ key, value };
}
ctx->release_page(tmp);
for (size_t i = key & (bit - 1); i < ctx->dir->size; i += bit)
{
ctx->dir->buckets[i] = i & bit ? n : b;
}
return true;
}
bool hash_table_init(hash_ctx_t* ctx) {
ctx->dir = ctx->allocate_page(1);
if (ctx->dir == NULL)
return false;
ctx->dir->size = 2;
ctx->dir->allocated_pages = 1;
ctx->dir->depth = 1;
ctx->dir->buckets[0] = create_hash_bucket(ctx);
ctx->dir->buckets[1] = create_hash_bucket(ctx);
if (!ctx->dir->buckets[0] || !ctx->dir->buckets[1]) {
ctx->release_page(ctx->dir->buckets[0]);
ctx->release_page(ctx->dir->buckets[1]);
ctx->release_page(ctx->dir);
return false;
}
return true;
}
void* allocate_4k_page(uint32_t n) {
return _aligned_malloc(4096 * n, 4096);
}
void release_4k_page(void* p) {
if (!p)
return;
_aligned_free(p);
}
int main()
{
const uint32_t size = 31;
struct hash_ctx ctx = { 256, allocate_4k_page, release_4k_page };
if (!hash_table_init(&ctx)) {
printf("Failed to init\n");
return -1;
}
for (size_t i = 0; i < size; i++)
{
if (!hash_table_put(&ctx, i, 100ull + i)) {
printf("Failed to put\n");
return -1;
}
for (size_t x = 0; x < i; x++)
{
uint64_t val;
if (!hash_table_get(&ctx, x, &val)) {
printf("Failed to get %i\n", x);
print_dir_graphviz(&ctx);
return -1;
}
}
}
print_dir_graphviz(&ctx);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment