Created
November 14, 2019 19:09
-
-
Save ayende/5aaa6500645ad25228ab43c96f8f849e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <malloc.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <inttypes.h> | |
#include <stdbool.h> | |
typedef struct datum { | |
uint64_t key; | |
uint64_t value; | |
} datum_t; | |
typedef struct hash_bucket { | |
uint16_t size; | |
uint16_t capacity; | |
uint8_t depth; | |
datum_t values[0]; | |
} hash_bucket_t; | |
typedef struct hash_directory { | |
uint32_t size; | |
uint32_t allocated_pages; | |
uint8_t depth; | |
hash_bucket_t* buckets[0]; | |
} hash_directory_t; | |
typedef struct hash_ctx { | |
size_t page_size; | |
void* (*allocate_page)(uint32_t n); | |
void (*release_page)(void* p); | |
hash_directory_t* dir; | |
} hash_ctx_t; | |
void print_bits(uint64_t v, int depth) | |
{ | |
unsigned char* b = (unsigned char*)&v; | |
unsigned char byte; | |
int i, j; | |
int bits = sizeof(uint64_t) * 8; | |
bool foundNonZero = false; | |
for (i = sizeof(uint64_t) - 1; i >= 0; i--) | |
{ | |
for (j = 7; j >= 0; j--) | |
{ | |
bits--; | |
byte = (b[i] >> j) & 1; | |
if (byte) { | |
foundNonZero = true; | |
} | |
if (!foundNonZero && bits > depth && bits > 8) | |
continue; | |
printf("%u", byte); | |
if(bits == depth) | |
printf(" "); | |
} | |
} | |
} | |
void print_bucket(hash_bucket_t* b, uint32_t i) { | |
printf("\tbucket_%p [label=\"Depth: %i, Size: %i\\lCapacity: %i\\l--------\\l", b, b->depth, b->size, b->capacity); | |
for (size_t i = 0; i < b->size; i++) | |
{ | |
print_bits(b->values[i].key, b->depth); | |
printf(" \\| %4llu = %4llu\\l", b->values[i].key, b->values[i].value); | |
} | |
printf("\"]\n"); | |
} | |
void print_dir_graphviz(hash_ctx_t* ctx) { | |
printf("digraph hash {\n\tnode[shape = record ]; \n"); | |
printf("\ttable [label=\"Depth: %i, Size: %i\\lPages: %i, Page Size: %i\\l\"]\n", ctx->dir->depth, ctx->dir->size, ctx->dir->allocated_pages, ctx->page_size); | |
printf("\tbuckets [label=\""); | |
for (size_t i = 0; i < ctx->dir->size; i++) | |
{ | |
if (i != 0) | |
printf("|"); | |
printf("<bucket_%i> %i - %p ", i, i, ctx->dir->buckets[i]); | |
} | |
printf("\"]\n"); | |
for (size_t i = 0; i < ctx->dir->size; i++) | |
{ | |
print_bucket(ctx->dir->buckets[i], i); | |
} | |
for (size_t i = 0; i < ctx->dir->size; i++) { | |
printf("\tbuckets:bucket_%i -> bucket_%p;\n", i, ctx->dir->buckets[i]); | |
} | |
printf("\ttable->buckets;\n}\n"); | |
} | |
uint32_t hash_table_bucket_number(hash_ctx_t* ctx, uint64_t h) { | |
return h & (((uint64_t)1 << ctx->dir->depth) - 1); | |
} | |
size_t hash_table_get_directory_capacity(hash_ctx_t* ctx) { | |
return ((ctx->dir->allocated_pages * ctx->page_size) - sizeof(hash_directory_t)) / sizeof(hash_bucket_t); | |
} | |
hash_bucket_t* create_hash_bucket(hash_ctx_t* ctx) { | |
hash_bucket_t* b = ctx->allocate_page(1); | |
if (b == NULL) | |
return NULL; | |
b->size = 0; | |
b->capacity = (uint16_t)((ctx->page_size - sizeof(hash_bucket_t)) / sizeof(datum_t)); | |
b->depth = ctx->dir->depth; | |
return b; | |
} | |
bool hash_table_get(hash_ctx_t* ctx, uint64_t key, uint64_t* value) { | |
uint32_t bucket_idx = hash_table_bucket_number(ctx, key); | |
hash_bucket_t* b = ctx->dir->buckets[bucket_idx]; | |
for (size_t i = 0; i < b->size; i++) | |
{ | |
if (b->values[i].key == key) { | |
*value = b->values[i].value; | |
return true; | |
} | |
} | |
return false; | |
} | |
bool hash_table_put(hash_ctx_t* ctx, uint64_t key, uint64_t value) { | |
uint32_t bucket_idx = hash_table_bucket_number(ctx, key); | |
hash_bucket_t* b = ctx->dir->buckets[bucket_idx]; | |
for (size_t i = 0; i < b->size; i++) | |
{ | |
if (b->values[i].key == key) { | |
b->values[i].value = value; | |
return true; | |
} | |
} | |
if (b->capacity > b->size) { | |
b->values[b->size++] = (datum_t){ key, value }; | |
return true; | |
} | |
if (ctx->dir->depth == b->depth) { | |
hash_directory_t* new_dir; | |
if (ctx->dir->size * 2 > hash_table_get_directory_capacity(ctx)) { | |
// have to increase the actual allocated memory here | |
new_dir = ctx->allocate_page(ctx->dir->allocated_pages * 2); | |
if (new_dir == NULL) | |
return false; | |
size_t dir_size = ctx->dir->allocated_pages * ctx->page_size; | |
memcpy(new_dir, ctx->dir, dir_size); | |
new_dir->allocated_pages *= 2; | |
} | |
else { | |
new_dir = ctx->dir; // there is enough space to increase size without allocations | |
} | |
size_t buckets_size = ctx->dir->size * sizeof(hash_bucket_t*); | |
memcpy((char*)new_dir + sizeof(hash_directory_t) + buckets_size, (char*)ctx->dir + sizeof(hash_directory_t), buckets_size); | |
new_dir->depth++; | |
new_dir->size = ctx->dir->size * 2; | |
if (new_dir != ctx->dir) { | |
ctx->release_page(ctx->dir); | |
ctx->dir = new_dir; | |
} | |
} | |
hash_bucket_t* n = create_hash_bucket(ctx); | |
if (!n) | |
return false; | |
hash_bucket_t* tmp = ctx->allocate_page(1); | |
if (!tmp) { | |
ctx->release_page(n); | |
ctx->release_page(tmp); | |
// no need to release the ctx->dir we allocated, was wired | |
// properly to the table and will be freed with the whole table | |
return false; | |
} | |
memcpy(tmp, b, ctx->page_size); | |
n->depth = b->depth = tmp->depth + 1; | |
b->size = 0; | |
uint32_t bit = 1 << tmp->depth; | |
for (size_t i = 0; i < tmp->size; i++) | |
{ | |
hash_bucket_t* p = tmp->values[i].key & bit ? n : b; | |
p->values[p->size++] = tmp->values[i]; | |
} | |
// now can add the new value in... | |
{ | |
hash_bucket_t* p = key & bit ? n : b; | |
p->values[p->size++] = (datum_t){ key, value }; | |
} | |
ctx->release_page(tmp); | |
for (size_t i = key & (bit - 1); i < ctx->dir->size; i += bit) | |
{ | |
ctx->dir->buckets[i] = i & bit ? n : b; | |
} | |
return true; | |
} | |
bool hash_table_init(hash_ctx_t* ctx) { | |
ctx->dir = ctx->allocate_page(1); | |
if (ctx->dir == NULL) | |
return false; | |
ctx->dir->size = 2; | |
ctx->dir->allocated_pages = 1; | |
ctx->dir->depth = 1; | |
ctx->dir->buckets[0] = create_hash_bucket(ctx); | |
ctx->dir->buckets[1] = create_hash_bucket(ctx); | |
if (!ctx->dir->buckets[0] || !ctx->dir->buckets[1]) { | |
ctx->release_page(ctx->dir->buckets[0]); | |
ctx->release_page(ctx->dir->buckets[1]); | |
ctx->release_page(ctx->dir); | |
return false; | |
} | |
return true; | |
} | |
void* allocate_4k_page(uint32_t n) { | |
return _aligned_malloc(4096 * n, 4096); | |
} | |
void release_4k_page(void* p) { | |
if (!p) | |
return; | |
_aligned_free(p); | |
} | |
int main() | |
{ | |
const uint32_t size = 31; | |
struct hash_ctx ctx = { 256, allocate_4k_page, release_4k_page }; | |
if (!hash_table_init(&ctx)) { | |
printf("Failed to init\n"); | |
return -1; | |
} | |
for (size_t i = 0; i < size; i++) | |
{ | |
if (!hash_table_put(&ctx, i, 100ull + i)) { | |
printf("Failed to put\n"); | |
return -1; | |
} | |
for (size_t x = 0; x < i; x++) | |
{ | |
uint64_t val; | |
if (!hash_table_get(&ctx, x, &val)) { | |
printf("Failed to get %i\n", x); | |
print_dir_graphviz(&ctx); | |
return -1; | |
} | |
} | |
} | |
print_dir_graphviz(&ctx); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment