ayende/extensible.hasing.c

## extensible.hasing.c
#include <stdio.h>
#include <malloc.h>
#include <stdint.h>
#include <string.h>
#include <inttypes.h>
#include <stdbool.h>

typedef struct datum {
	uint64_t key;
	uint64_t value;
} datum_t;

typedef struct hash_bucket {
	uint16_t size;
	uint16_t capacity;
	uint8_t depth;
	datum_t values[0];
} hash_bucket_t;

typedef struct hash_directory {
	uint32_t size;
	uint32_t allocated_pages;
	uint8_t depth;
	hash_bucket_t* buckets[0];
} hash_directory_t;

typedef struct hash_ctx {
	size_t page_size;
	void* (*allocate_page)(uint32_t n);
	void (*release_page)(void* p);
	hash_directory_t* dir;
} hash_ctx_t;

void print_bits(uint64_t v, int depth)
{
	unsigned char* b = (unsigned char*)&v;
	unsigned char byte;
	int i, j;
	int bits = sizeof(uint64_t) * 8;
	bool foundNonZero = false;
	for (i = sizeof(uint64_t) - 1; i >= 0; i--)
	{
		for (j = 7; j >= 0; j--)
		{
			bits--;
			byte = (b[i] >> j) & 1;
			if (byte) {
				foundNonZero = true;
			}
			if (!foundNonZero && bits > depth && bits > 8)
				continue;
			printf("%u", byte);
			if(bits == depth)
				printf(" ");
		}
	}
}

void print_bucket(hash_bucket_t* b, uint32_t i) {
	printf("\tbucket_%p [label=\"Depth: %i, Size: %i\\lCapacity: %i\\l--------\\l", b, b->depth, b->size, b->capacity);
	for (size_t i = 0; i < b->size; i++)
	{
		print_bits(b->values[i].key, b->depth);
		printf(" \\| %4llu = %4llu\\l", b->values[i].key, b->values[i].value);
	}
	printf("\"]\n");
}

void print_dir_graphviz(hash_ctx_t* ctx) {
	printf("digraph hash {\n\tnode[shape = record ]; \n");
	printf("\ttable [label=\"Depth: %i, Size: %i\\lPages: %i, Page Size: %i\\l\"]\n", ctx->dir->depth, ctx->dir->size, ctx->dir->allocated_pages, ctx->page_size);
	printf("\tbuckets [label=\"");
	for (size_t i = 0; i < ctx->dir->size; i++)
	{
		if (i != 0)
			printf("|");
		printf("<bucket_%i> %i - %p ", i, i, ctx->dir->buckets[i]);
	}
	printf("\"]\n");
	for (size_t i = 0; i < ctx->dir->size; i++)
	{
		print_bucket(ctx->dir->buckets[i], i);
	}

	for (size_t i = 0; i < ctx->dir->size; i++) {
		printf("\tbuckets:bucket_%i -> bucket_%p;\n", i, ctx->dir->buckets[i]);
	}
	printf("\ttable->buckets;\n}\n");
}


uint32_t hash_table_bucket_number(hash_ctx_t* ctx, uint64_t h) {
	return h & (((uint64_t)1 << ctx->dir->depth) - 1);
}

size_t hash_table_get_directory_capacity(hash_ctx_t* ctx) {
	return ((ctx->dir->allocated_pages * ctx->page_size) - sizeof(hash_directory_t)) / sizeof(hash_bucket_t);
}

hash_bucket_t* create_hash_bucket(hash_ctx_t* ctx) {
	hash_bucket_t* b = ctx->allocate_page(1);
	if (b == NULL)
		return NULL;
	b->size = 0;
	b->capacity = (uint16_t)((ctx->page_size - sizeof(hash_bucket_t)) / sizeof(datum_t));
	b->depth = ctx->dir->depth;
	return b;
}

bool hash_table_get(hash_ctx_t* ctx, uint64_t key, uint64_t* value) {
	uint32_t bucket_idx = hash_table_bucket_number(ctx, key);
	hash_bucket_t* b = ctx->dir->buckets[bucket_idx];
	for (size_t i = 0; i < b->size; i++)
	{
		if (b->values[i].key == key) {
			*value = b->values[i].value;
			return true;
		}
	}
	return false;
}

bool hash_table_put(hash_ctx_t* ctx, uint64_t key, uint64_t value) {
	uint32_t bucket_idx = hash_table_bucket_number(ctx, key);
	hash_bucket_t* b = ctx->dir->buckets[bucket_idx];
	for (size_t i = 0; i < b->size; i++)
	{
		if (b->values[i].key == key) {
			b->values[i].value = value;
			return true;
		}
	}
	if (b->capacity > b->size) {
		b->values[b->size++] = (datum_t){ key, value };
		return true;
	}

	if (ctx->dir->depth == b->depth) {
		hash_directory_t* new_dir;
		if (ctx->dir->size * 2 > hash_table_get_directory_capacity(ctx)) {

			// have to increase the actual allocated memory here
			new_dir = ctx->allocate_page(ctx->dir->allocated_pages * 2);
			if (new_dir == NULL)
				return false;

			size_t dir_size = ctx->dir->allocated_pages * ctx->page_size;
			memcpy(new_dir, ctx->dir, dir_size);
			new_dir->allocated_pages *= 2;
		}
		else {
			new_dir = ctx->dir; // there is enough space to increase size without allocations
		}
		size_t buckets_size = ctx->dir->size * sizeof(hash_bucket_t*);
		memcpy((char*)new_dir + sizeof(hash_directory_t) + buckets_size, (char*)ctx->dir + sizeof(hash_directory_t), buckets_size);
		new_dir->depth++;
		new_dir->size = ctx->dir->size * 2;
		if (new_dir != ctx->dir) {
			ctx->release_page(ctx->dir);
			ctx->dir = new_dir;
		}
	}

	hash_bucket_t* n = create_hash_bucket(ctx);
	if (!n)
		return false;

	hash_bucket_t* tmp = ctx->allocate_page(1);
	if (!tmp) {
		ctx->release_page(n);
		ctx->release_page(tmp);
		// no need to release the ctx->dir we allocated, was wired
		// properly to the table and will be freed with the whole table
		return false;
	}
	memcpy(tmp, b, ctx->page_size);

	n->depth = b->depth = tmp->depth + 1;
	b->size = 0;

	uint32_t bit = 1 << tmp->depth;
	for (size_t i = 0; i < tmp->size; i++)
	{
		hash_bucket_t* p = tmp->values[i].key & bit ? n : b;
		p->values[p->size++] = tmp->values[i];
	}

	// now can add the new value in...
	{
		hash_bucket_t* p = key & bit ? n : b;
		p->values[p->size++] = (datum_t){ key, value };
	}

	ctx->release_page(tmp);

	for (size_t i = key & (bit - 1); i < ctx->dir->size; i += bit)
	{
		ctx->dir->buckets[i] = i & bit ? n : b;
	}

	return true;
}


bool hash_table_init(hash_ctx_t* ctx) {
	ctx->dir = ctx->allocate_page(1);
	if (ctx->dir == NULL)
		return false;

	ctx->dir->size = 2;
	ctx->dir->allocated_pages = 1;
	ctx->dir->depth = 1;

	ctx->dir->buckets[0] = create_hash_bucket(ctx);
	ctx->dir->buckets[1] = create_hash_bucket(ctx);

	if (!ctx->dir->buckets[0] || !ctx->dir->buckets[1]) {
		ctx->release_page(ctx->dir->buckets[0]);
		ctx->release_page(ctx->dir->buckets[1]);
		ctx->release_page(ctx->dir);
		return false;
	}

	return true;
}

void* allocate_4k_page(uint32_t n) {
	return _aligned_malloc(4096 * n, 4096);
}

void release_4k_page(void* p) {
	if (!p)
		return;
	_aligned_free(p);
}

int main()
{
	const uint32_t size = 31;
	struct hash_ctx ctx = { 256, allocate_4k_page, release_4k_page };
	if (!hash_table_init(&ctx)) {
		printf("Failed to init\n");
		return -1;
	}

	for (size_t i = 0; i < size; i++)
	{
		if (!hash_table_put(&ctx, i, 100ull + i)) {
			printf("Failed to put\n");
			return -1;
		}


		for (size_t x = 0; x < i; x++)
		{
			uint64_t val;
			if (!hash_table_get(&ctx, x, &val)) {
				printf("Failed to get %i\n", x);
				print_dir_graphviz(&ctx);
				return -1;
			}
		}
	}

	print_dir_graphviz(&ctx);
}
	#include <stdio.h>
	#include <malloc.h>
	#include <stdint.h>
	#include <string.h>
	#include <inttypes.h>
	#include <stdbool.h>

	typedef struct datum {
	uint64_t key;
	uint64_t value;
	} datum_t;

	typedef struct hash_bucket {
	uint16_t size;
	uint16_t capacity;
	uint8_t depth;
	datum_t values[0];
	} hash_bucket_t;

	typedef struct hash_directory {
	uint32_t size;
	uint32_t allocated_pages;
	uint8_t depth;
	hash_bucket_t* buckets[0];
	} hash_directory_t;

	typedef struct hash_ctx {
	size_t page_size;
	void* (*allocate_page)(uint32_t n);
	void (release_page)(void p);
	hash_directory_t* dir;
	} hash_ctx_t;

	void print_bits(uint64_t v, int depth)
	{
	unsigned char* b = (unsigned char*)&v;
	unsigned char byte;
	int i, j;
	int bits = sizeof(uint64_t) * 8;
	bool foundNonZero = false;
	for (i = sizeof(uint64_t) - 1; i >= 0; i--)
	{
	for (j = 7; j >= 0; j--)
	{
	bits--;
	byte = (b[i] >> j) & 1;
	if (byte) {
	foundNonZero = true;
	}
	if (!foundNonZero && bits > depth && bits > 8)
	continue;
	printf("%u", byte);
	if(bits == depth)
	printf(" ");
	}
	}
	}

	void print_bucket(hash_bucket_t* b, uint32_t i) {
	printf("\tbucket_%p [label=\"Depth: %i, Size: %i\\lCapacity: %i\\l--------\\l", b, b->depth, b->size, b->capacity);
	for (size_t i = 0; i < b->size; i++)
	{
	print_bits(b->values[i].key, b->depth);
	printf(" \\\| %4llu = %4llu\\l", b->values[i].key, b->values[i].value);
	}
	printf("\"]\n");
	}

	void print_dir_graphviz(hash_ctx_t* ctx) {
	printf("digraph hash {\n\tnode[shape = record ]; \n");
	printf("\ttable [label=\"Depth: %i, Size: %i\\lPages: %i, Page Size: %i\\l\"]\n", ctx->dir->depth, ctx->dir->size, ctx->dir->allocated_pages, ctx->page_size);
	printf("\tbuckets [label=\"");
	for (size_t i = 0; i < ctx->dir->size; i++)
	{
	if (i != 0)
	printf("\|");
	printf("<bucket_%i> %i - %p ", i, i, ctx->dir->buckets[i]);
	}
	printf("\"]\n");
	for (size_t i = 0; i < ctx->dir->size; i++)
	{
	print_bucket(ctx->dir->buckets[i], i);
	}

	for (size_t i = 0; i < ctx->dir->size; i++) {
	printf("\tbuckets:bucket_%i -> bucket_%p;\n", i, ctx->dir->buckets[i]);
	}
	printf("\ttable->buckets;\n}\n");
	}



	uint32_t hash_table_bucket_number(hash_ctx_t* ctx, uint64_t h) {
	return h & (((uint64_t)1 << ctx->dir->depth) - 1);
	}

	size_t hash_table_get_directory_capacity(hash_ctx_t* ctx) {
	return ((ctx->dir->allocated_pages * ctx->page_size) - sizeof(hash_directory_t)) / sizeof(hash_bucket_t);
	}

	hash_bucket_t* create_hash_bucket(hash_ctx_t* ctx) {
	hash_bucket_t* b = ctx->allocate_page(1);
	if (b == NULL)
	return NULL;
	b->size = 0;
	b->capacity = (uint16_t)((ctx->page_size - sizeof(hash_bucket_t)) / sizeof(datum_t));
	b->depth = ctx->dir->depth;
	return b;
	}

	bool hash_table_get(hash_ctx_t* ctx, uint64_t key, uint64_t* value) {
	uint32_t bucket_idx = hash_table_bucket_number(ctx, key);
	hash_bucket_t* b = ctx->dir->buckets[bucket_idx];
	for (size_t i = 0; i < b->size; i++)
	{
	if (b->values[i].key == key) {
	*value = b->values[i].value;
	return true;
	}
	}
	return false;
	}

	bool hash_table_put(hash_ctx_t* ctx, uint64_t key, uint64_t value) {
	uint32_t bucket_idx = hash_table_bucket_number(ctx, key);
	hash_bucket_t* b = ctx->dir->buckets[bucket_idx];
	for (size_t i = 0; i < b->size; i++)
	{
	if (b->values[i].key == key) {
	b->values[i].value = value;
	return true;
	}
	}
	if (b->capacity > b->size) {
	b->values[b->size++] = (datum_t){ key, value };
	return true;
	}

	if (ctx->dir->depth == b->depth) {
	hash_directory_t* new_dir;
	if (ctx->dir->size * 2 > hash_table_get_directory_capacity(ctx)) {

	// have to increase the actual allocated memory here
	new_dir = ctx->allocate_page(ctx->dir->allocated_pages * 2);
	if (new_dir == NULL)
	return false;

	size_t dir_size = ctx->dir->allocated_pages * ctx->page_size;
	memcpy(new_dir, ctx->dir, dir_size);
	new_dir->allocated_pages *= 2;
	}
	else {
	new_dir = ctx->dir; // there is enough space to increase size without allocations
	}
	size_t buckets_size = ctx->dir->size * sizeof(hash_bucket_t*);
	memcpy((char)new_dir + sizeof(hash_directory_t) + buckets_size, (char)ctx->dir + sizeof(hash_directory_t), buckets_size);
	new_dir->depth++;
	new_dir->size = ctx->dir->size * 2;
	if (new_dir != ctx->dir) {
	ctx->release_page(ctx->dir);
	ctx->dir = new_dir;
	}
	}

	hash_bucket_t* n = create_hash_bucket(ctx);
	if (!n)
	return false;

	hash_bucket_t* tmp = ctx->allocate_page(1);
	if (!tmp) {
	ctx->release_page(n);
	ctx->release_page(tmp);
	// no need to release the ctx->dir we allocated, was wired
	// properly to the table and will be freed with the whole table
	return false;
	}
	memcpy(tmp, b, ctx->page_size);

	n->depth = b->depth = tmp->depth + 1;
	b->size = 0;

	uint32_t bit = 1 << tmp->depth;
	for (size_t i = 0; i < tmp->size; i++)
	{
	hash_bucket_t* p = tmp->values[i].key & bit ? n : b;
	p->values[p->size++] = tmp->values[i];
	}

	// now can add the new value in...
	{
	hash_bucket_t* p = key & bit ? n : b;
	p->values[p->size++] = (datum_t){ key, value };
	}

	ctx->release_page(tmp);

	for (size_t i = key & (bit - 1); i < ctx->dir->size; i += bit)
	{
	ctx->dir->buckets[i] = i & bit ? n : b;
	}

	return true;
	}


	bool hash_table_init(hash_ctx_t* ctx) {
	ctx->dir = ctx->allocate_page(1);
	if (ctx->dir == NULL)
	return false;

	ctx->dir->size = 2;
	ctx->dir->allocated_pages = 1;
	ctx->dir->depth = 1;

	ctx->dir->buckets[0] = create_hash_bucket(ctx);
	ctx->dir->buckets[1] = create_hash_bucket(ctx);

	if (!ctx->dir->buckets[0] \|\| !ctx->dir->buckets[1]) {
	ctx->release_page(ctx->dir->buckets[0]);
	ctx->release_page(ctx->dir->buckets[1]);
	ctx->release_page(ctx->dir);
	return false;
	}

	return true;
	}

	void* allocate_4k_page(uint32_t n) {
	return _aligned_malloc(4096 * n, 4096);
	}

	void release_4k_page(void* p) {
	if (!p)
	return;
	_aligned_free(p);
	}

	int main()
	{
	const uint32_t size = 31;
	struct hash_ctx ctx = { 256, allocate_4k_page, release_4k_page };
	if (!hash_table_init(&ctx)) {
	printf("Failed to init\n");
	return -1;
	}

	for (size_t i = 0; i < size; i++)
	{
	if (!hash_table_put(&ctx, i, 100ull + i)) {
	printf("Failed to put\n");
	return -1;
	}


	for (size_t x = 0; x < i; x++)
	{
	uint64_t val;
	if (!hash_table_get(&ctx, x, &val)) {
	printf("Failed to get %i\n", x);
	print_dir_graphviz(&ctx);
	return -1;
	}
	}
	}

	print_dir_graphviz(&ctx);
	}