ayende/ehash.c

## ehash.c
#include <stdlib.h>
#include <stdio.h>
#include <malloc.h>
#include <stdint.h>
#include <string.h>
#include <inttypes.h>
#include <stdbool.h>

#include "wyhash.h" // https://github.com/wangyi-fudan/wymlp/blob/master/wyhash.h

#define VALIDATE 0

#define HASH_BUCKET_PAGE_SIZE			   8192
#define NUMBER_OF_HASH_BUCKET_PIECES		127
#define PIECE_BUCKET_BUFFER_SIZE			 63

typedef struct hash_bucket_piece {
	uint8_t overflowed: 1;
	uint8_t bytes_used: 7;
	uint8_t data[PIECE_BUCKET_BUFFER_SIZE];
} hash_bucket_piece_t;

static_assert(sizeof(hash_bucket_piece_t) == 64, "hash_bucket_piece_t is expected to be 64 bytes exactly");

typedef struct hash_bucket {
	union {
		struct {
			uint64_t number_of_entries;
			uint8_t depth;
			bool seen;
		};
		uint8_t _padding[64];
	};
	hash_bucket_piece_t pieces[NUMBER_OF_HASH_BUCKET_PIECES];
} hash_bucket_t;

typedef struct hash_directory {
	uint64_t number_of_entries;
	uint32_t number_of_buckets;
	uint32_t directory_pages;
	uint8_t depth;
	hash_bucket_t* buckets[0];
} hash_directory_t;

typedef struct hash_ctx {
	void* (*allocate_page)(uint32_t n);
	void (*release_page)(void* p);
	hash_directory_t* dir;
} hash_ctx_t;

uint64_t hash(uint64_t x) {
	x = (x ^ (x >> 30))* UINT64_C(0xbf58476d1ce4e5b9);
	x = (x ^ (x >> 27))* UINT64_C(0x94d049bb133111eb);
	x = x ^ (x >> 31);
	return x;
}

void varint_decode(uint8_t** buf, uint64_t* val) {
	uint64_t result = 0;
	uint32_t shift = 0;
	uint8_t* ptr = *buf;
	uint8_t cur = 0;
	do
	{
		cur = *ptr++;
		result |= (uint64_t)(cur & 0x7f) << shift;
		shift += 7;
	} while (cur & 0x80);
	*val = result;
	*buf = ptr;
}

void varint_encode(uint64_t val, uint8_t** buf) {
	uint8_t* ptr = *buf;
	while (val >= 0x80) {
		*ptr++ = ((uint8_t)val | 0x80);
		val >>= 7;
	}
	*ptr++ = (uint8_t)(val);
	*buf = ptr;
}

void print_bits(FILE* fd, uint64_t v, int depth)
{
	uint8_t* b = (uint8_t*)&v;
	uint8_t byte;
	int i, j;
	int bits = sizeof(uint64_t) * 8;
	bool foundNonZero = false;
	for (i = sizeof(uint64_t) - 1; i >= 0; i--)
	{
		for (j = 7; j >= 0; j--)
		{
			bits--;
			byte = (b[i] >> j) & 1;
			if (byte) {
				foundNonZero = true;
			}
			if (!foundNonZero && bits > depth&& bits > 8)
				continue;
			fprintf(fd, "%u", byte);
			if (bits == depth)
				fprintf(fd, " ");
		}
	}
}

void print_hash_stats(hash_ctx_t* ctx) {

	printf("Depth: %i - Entries: %I64u, Buckets: %i \n", ctx->dir->depth, ctx->dir->number_of_entries, ctx->dir->number_of_buckets);
	for (size_t i = 0; i < ctx->dir->number_of_buckets; i++)
		ctx->dir->buckets[i]->seen = false;

	uint32_t min = 64, max = 0, total = 0,empties=0;
	uint64_t sum = 0;

	for (size_t i = 0; i < ctx->dir->number_of_buckets; i++)
	{
		hash_bucket_t* b = ctx->dir->buckets[i];
		if (b->seen)
			continue;
		b->seen = false;
		size_t total_used = 0;
		for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++) {
			total_used += b->pieces[i].bytes_used;
			if (b->pieces[i].bytes_used)
				min = min < b->pieces[i].bytes_used ? min : b->pieces[i].bytes_used;
			else
				empties++;
			max = max > b->pieces[i].bytes_used ? max : b->pieces[i].bytes_used;
			sum += b->pieces[i].bytes_used;
			total++;
		}
		//printf("%p - Depth: %i, Entries: %I64u, Size: %i\n", b, b->depth, b->number_of_entries, total_used);
	}
	printf("Total: %i, Min: %i, Max: %iu, Sum: %llu, Empties: %i, Avg: %f\n", total, min, max, sum, empties, sum / (float)total);
}

void print_bucket(FILE* fd, hash_bucket_t* b, uint32_t idx) {
	size_t total_used = 0;
	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++) {
		total_used += b->pieces[i].bytes_used;
	}
	fprintf(fd, "\tbucket_%p [label=\"Depth: %i, Entries: %I64u, Size: %i, Index: %i\\l--------\\l", b, b->depth, b->number_of_entries, total_used, idx);
	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++) {
		hash_bucket_piece_t* p = &b->pieces[i];
		uint8_t* buf = p->data;
		uint8_t* end = p->data + p->bytes_used;
		while (buf < end)
		{
			uint64_t k, v;
			varint_decode(&buf, &k);
			varint_decode(&buf, &v);

			print_bits(fd, k, b->depth);
			fprintf(fd, " \\| %4llu = %4llu\\l", k, v);
		}
	}
	fprintf(fd, "\"]\n");
}

void print_dir_graphviz_to_file(FILE* fd, hash_ctx_t* ctx) {
	fprintf(fd, "digraph hash {\n\tnode[shape = record ]; \n");
	fprintf(fd, "\ttable [label=\"Depth: %i, Size: %i\\lPages: %i, Entries: %I64u\\l\"]\n", ctx->dir->depth, ctx->dir->number_of_buckets, ctx->dir->directory_pages, ctx->dir->number_of_entries);
	fprintf(fd, "\tbuckets [label=\"");
	for (size_t i = 0; i < ctx->dir->number_of_buckets; i++)
	{
		if (i != 0)
			fprintf(fd, "|");
		fprintf(fd, "<bucket_%i> %i - %p ", i, i, ctx->dir->buckets[i]);
		ctx->dir->buckets[i]->seen = false;
	}
	fprintf(fd, "\"]\n");
	for (size_t i = 0; i < ctx->dir->number_of_buckets; i++)
	{
		if (ctx->dir->buckets[i]->seen)
			continue;
		ctx->dir->buckets[i]->seen = true;
		print_bucket(fd, ctx->dir->buckets[i], i);
	}

	for (size_t i = 0; i < ctx->dir->number_of_buckets; i++) {
		fprintf(fd, "\tbuckets:bucket_%i -> bucket_%p;\n", i, ctx->dir->buckets[i]);
	}
	fprintf(fd, "\ttable->buckets;\n}\n");
}

void print_dir_graphviz(hash_ctx_t* ctx) {
	print_dir_graphviz_to_file(stdout, ctx);
}

void write_dir_graphviz(hash_ctx_t* ctx, const char* prefix) {
	static int counter = 0;
	uint8_t buffer[256];
	memset(buffer, 0, sizeof buffer);
	counter++;
	sprintf_s(buffer, sizeof buffer, "%s-%i.txt", prefix, counter);
	FILE* f = NULL;
	errno_t err = fopen_s(&f, buffer, "w");
	if (err)
	{
		printf("Failed to open %s - %i", buffer, err);
		return;
	}
	print_dir_graphviz_to_file(f, ctx);
	if (f != 0)
		fclose(f);

	char* copy = _strdup(buffer);
	memset(buffer, 0, sizeof buffer);
	sprintf_s(buffer, sizeof buffer, "C:\\Users\\ayende\\Downloads\\graphviz-2.38\\release\\bin\\dot.exe  -Tsvg %s > %s.svg", copy, copy);
	system(buffer);
	memset(buffer, 0, sizeof buffer);
	sprintf_s(buffer, sizeof buffer, "%s.svg", copy);
	system(buffer);
	free(copy);
}

uint32_t hash_table_bucket_number(hash_ctx_t* ctx, uint64_t h) {
	return h & (((uint64_t)1 << ctx->dir->depth) - 1);
}

size_t hash_table_get_directory_capacity(hash_ctx_t* ctx) {
	return ((ctx->dir->directory_pages * HASH_BUCKET_PAGE_SIZE) - sizeof(hash_directory_t)) / sizeof(hash_bucket_t*);
}

hash_bucket_t* create_hash_bucket(hash_ctx_t* ctx) {
	hash_bucket_t* b = ctx->allocate_page(1);
	if (b == NULL)
		return NULL;

	memset(b, 0, sizeof(hash_bucket_t));
	b->depth = ctx->dir->depth;
	return b;
}

bool hash_table_get(hash_ctx_t* ctx, uint64_t key, uint64_t* value) {
	uint32_t bucket_idx = hash_table_bucket_number(ctx, key);
	hash_bucket_t* b = ctx->dir->buckets[bucket_idx];
	uint32_t piece_idx = key % NUMBER_OF_HASH_BUCKET_PIECES;

	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
		hash_bucket_piece_t* p = &b->pieces[(piece_idx + i) % NUMBER_OF_HASH_BUCKET_PIECES];
		uint8_t* buf = p->data;
		uint8_t* end = p->data + p->bytes_used;
		while (buf < end)
		{
			uint64_t k, v;
			varint_decode(&buf, &k);
			varint_decode(&buf, &v);
			if (k == key) {
				*value = v;
				return true;
			}
		}
		if (!p->overflowed)
			break;
	}

	return false;
}

bool hash_table_piece_append_kv(hash_bucket_t* cur, uint64_t key, uint64_t value) {
	uint32_t piece_idx = key % NUMBER_OF_HASH_BUCKET_PIECES;
	uint8_t buffer[20];
	uint8_t* write_buf = buffer;
	varint_encode(key, &write_buf);
	varint_encode(value, &write_buf);
	uint8_t size = (uint8_t)(write_buf - buffer);

	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
		hash_bucket_piece_t* p = &cur->pieces[(piece_idx + i) % NUMBER_OF_HASH_BUCKET_PIECES];
		if (size + p->bytes_used > PIECE_BUCKET_BUFFER_SIZE) {
			p->overflowed = true;
			continue;
		}
		memcpy(p->data + p->bytes_used, buffer, size);
		p->bytes_used += size;
		cur->number_of_entries++;
		return true;
	}
	return false;
}

void validate_bucket(hash_ctx_t* ctx, hash_bucket_t* tmp) {
#if VALIDATE
	uint64_t mask = ((uint64_t)1 << tmp->depth) - 1;
	uint64_t first = 0;
	bool has_first = false;

	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
		uint8_t* buf = tmp->pieces[i].data;
		uint8_t* end = buf + tmp->pieces[i].bytes_used;
		while (buf < end)
		{
			uint64_t k, v;
			varint_decode(&buf, &k);
			varint_decode(&buf, &v);

			if (has_first == false)
			{
				first = k;
				has_first = true;
			}

			if ((k & mask) != (first & mask)) {
				write_dir_graphviz(ctx, "problem");
				printf("Problem\n");
			}
		}
	}
#endif
}

bool hash_table_put(hash_ctx_t* ctx, uint64_t key, uint64_t value) {
	uint32_t bucket_idx = hash_table_bucket_number(ctx, key);
	hash_bucket_t* b = ctx->dir->buckets[bucket_idx];
	uint32_t piece_idx = key % NUMBER_OF_HASH_BUCKET_PIECES;

	uint8_t tmp_buffer[20]; // each varint can take up to 10 bytes
	uint8_t* buf_end = tmp_buffer;
	varint_encode(key, &buf_end);
	varint_encode(value, &buf_end);
	ptrdiff_t encoded_size = buf_end - tmp_buffer;

	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
		hash_bucket_piece_t* p = &b->pieces[(piece_idx + i) % NUMBER_OF_HASH_BUCKET_PIECES];
		uint8_t* buf = p->data;
		uint8_t* end = p->data + p->bytes_used;
		while (buf < end)
		{
			uint64_t k, v;
			uint8_t* cur_buf_start = buf;
			varint_decode(&buf, &k);
			varint_decode(&buf, &v);

			if (k == key) {
				if (v == value)
					return true; // nothing to do, value is already there
				ptrdiff_t diff = buf - cur_buf_start;
				if (diff == encoded_size) {
					// new value fit exactly where the old one went, let's put it there
					memcpy(cur_buf_start, tmp_buffer, encoded_size);
					validate_bucket(ctx, b);
					return true;
				}

				memmove(cur_buf_start, buf, end - buf);
				p->bytes_used -= (uint8_t)diff;
				b->number_of_entries--;
				ctx->dir->number_of_entries--;
				break; // found it, let's put this in
			}
		}

		if (p->bytes_used + encoded_size <= PIECE_BUCKET_BUFFER_SIZE) {
			memcpy(p->data + p->bytes_used, tmp_buffer, encoded_size);
			p->bytes_used += (uint8_t)encoded_size;
			b->number_of_entries++;
			ctx->dir->number_of_entries++;

			validate_bucket(ctx, b);
			return true; // was able to update the value in the same piece, done
		}

		// if we are looking at an overflow page, move to the next one and try to find it there
		if (!p->overflowed)
			break;
	}
	// couldn't find it in the proper place, let's put it in overflow
	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
		hash_bucket_piece_t* p = &b->pieces[(piece_idx + i) % NUMBER_OF_HASH_BUCKET_PIECES];
		if (p->bytes_used + encoded_size <= PIECE_BUCKET_BUFFER_SIZE) {
			memcpy(p->data + p->bytes_used, tmp_buffer, encoded_size);
			p->bytes_used += (uint8_t)encoded_size;
			b->number_of_entries++;
			ctx->dir->number_of_entries++;

			validate_bucket(ctx, b);
			return true; // was able to update the value in the same piece, done
		}
		p->overflowed = true;
	}

	// there is no room here, need to expand

	if (ctx->dir->depth == b->depth) {
		hash_directory_t* new_dir;
		if (ctx->dir->number_of_buckets * 2 * sizeof(hash_bucket_t*) > hash_table_get_directory_capacity(ctx)) {

			// have to increase the actual allocated memory here
			new_dir = ctx->allocate_page(ctx->dir->directory_pages * 2);
			if (new_dir == NULL)
				return false;

			size_t dir_size = ctx->dir->directory_pages * HASH_BUCKET_PAGE_SIZE;
			memcpy(new_dir, ctx->dir, dir_size);
			new_dir->directory_pages *= 2;
		}
		else {
			new_dir = ctx->dir; // there is enough space to increase size without allocations
		}
		size_t buckets_size = ctx->dir->number_of_buckets * sizeof(hash_bucket_t*);
		memcpy((uint8_t*)new_dir->buckets + buckets_size, (uint8_t*)ctx->dir->buckets, buckets_size);
		new_dir->depth++;
		new_dir->number_of_buckets *= 2;
		if (new_dir != ctx->dir) {
			ctx->release_page(ctx->dir);
			ctx->dir = new_dir;
		}
	}
	//write_dir_graphviz(ctx, "BEFORE");
	hash_bucket_t* n = create_hash_bucket(ctx);
	if (!n)
		return false;

	hash_bucket_t* tmp = ctx->allocate_page(1);
	if (!tmp) {
		ctx->release_page(n);
		ctx->release_page(tmp);
		// no need to release the ctx->dir we allocated, was wired
		// properly to the table and will be freed with the whole table
		return false;
	}
	memcpy(tmp, b, HASH_BUCKET_PAGE_SIZE);
	memset(b, 0, sizeof(hash_bucket_t));
	memset(n, 0, sizeof(hash_bucket_t));
	n->depth = b->depth = tmp->depth + 1;

	uint32_t bit = 1 << tmp->depth;

#if VALIDATE
	uint64_t mask = (bit >> 1) - 1;
	bool has_first = false;
	uint64_t first_key = 0;
#endif

	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
		uint8_t* buf = tmp->pieces[i].data;
		uint8_t* end = buf + tmp->pieces[i].bytes_used;
		while (buf < end)
		{
			uint64_t k, v;
			varint_decode(&buf, &k);
			varint_decode(&buf, &v);
#if VALIDATE
			if (!has_first) {
				first_key = k;
				has_first = true;
			}

			if ((first_key & mask) != (k & mask)) {
				printf("mistmatch!: %I64u != %I64u\n", first_key, k);
			}
#endif

			hash_bucket_t* cur = k & bit ? n : b;
			bool success = hash_table_piece_append_kv(cur, k, v);
#if VALIDATE
			if (!success)
				printf("Can't split a page properly? Impossible");
#endif
		}
	}
	ctx->release_page(tmp);

	for (size_t i = key & (bit - 1); i < ctx->dir->number_of_buckets; i += bit)
	{
		ctx->dir->buckets[i] = i & bit ? n : b;
	}

	// now can add the new value in...
	{
		hash_bucket_t* cur = key & bit ? n : b;
		if (hash_table_piece_append_kv(cur, key, value)) {
			ctx->dir->number_of_entries++;
		}
		else {
			if (!hash_table_put(ctx, key, value)) {
#if VALIDATE
				printf("Unable to recursively add? That really should never happen.");
#endif
				return false;
			}
		}
	}

	validate_bucket(ctx, n);
	validate_bucket(ctx, b);
	return true;
}


bool hash_table_init(hash_ctx_t* ctx) {
	ctx->dir = ctx->allocate_page(1);
	if (ctx->dir == NULL)
		return false;

	memset(ctx->dir, 0, HASH_BUCKET_PAGE_SIZE);
	ctx->dir->number_of_entries = 0;
	ctx->dir->number_of_buckets = 2;
	ctx->dir->directory_pages = 1;
	ctx->dir->depth = 1;

	ctx->dir->buckets[0] = create_hash_bucket(ctx);
	ctx->dir->buckets[1] = create_hash_bucket(ctx);

	if (!ctx->dir->buckets[0] || !ctx->dir->buckets[1]) {
		ctx->release_page(ctx->dir->buckets[0]);
		ctx->release_page(ctx->dir->buckets[1]);
		ctx->release_page(ctx->dir);
		return false;
	}

	return true;
}

int allocations = 0;

void* allocate_4k_page(uint32_t n) {
	allocations++;
	return _aligned_malloc(HASH_BUCKET_PAGE_SIZE * n, HASH_BUCKET_PAGE_SIZE);
}

void release_4k_page(void* p) {
	if (!p)
		return;
	allocations--;
	_aligned_free(p);
}

int main()
{
	srand(19324);
	uint32_t const size = 1024 * 1024;

	uint64_t* keys = malloc(size * sizeof(uint64_t));
	uint64_t* values = malloc(size * sizeof(uint64_t));
	if (!keys || !values)
		return -1;


	for (size_t i = 0; i < size; i++)
	{
		keys[i] = wygrand();
		values[i] = wygrand();
	}

	struct hash_ctx ctx = { allocate_4k_page, release_4k_page };
	if (!hash_table_init(&ctx)) {
		printf("Failed to init\n");
		return -1;
	}

	uint64_t first_key = keys[0];
	uint64_t first_value = values[0];


	for (size_t i = 0; i < size; i++)
	{
		uint64_t v;
		if (i == 184) {
			printf("a	- ");
		}

		if (!hash_table_put(&ctx, keys[i], values[i])) {
			printf("Failed to put %i\n", i);
			write_dir_graphviz(&ctx, "PUT-ERR");
			return -1;
		}

		if (keys[i] == first_key)
			first_value = values[i];

		if (!hash_table_get(&ctx, first_key, &v) || v != first_value) {
			printf("Failed to get 0 on %i\n", i);
			write_dir_graphviz(&ctx, "PUT-ERR");
		}
	}

	//write_dir_graphviz(&ctx, "FINAL");
	for (size_t x = 0; x < size; x++)
	{
		uint64_t val;

		if (!hash_table_get(&ctx, keys[x], &val)) {
			printf("Failed to get %i\n", x);
			write_dir_graphviz(&ctx, "GET-ERR");
			return -1;
		}
		if (val != values[x]) {
			printf("mismatch on %I32u - %I64u - %I64u, %I64u \n", x, keys[x], val, values[x]);
		}
	}
	print_hash_stats(&ctx);

	printf("%f MB\n", ((double)allocations * (double)HASH_BUCKET_PAGE_SIZE) / 1024.0 / 1024.0);

	return 0;
}
	#include <stdlib.h>
	#include <stdio.h>
	#include <malloc.h>
	#include <stdint.h>
	#include <string.h>
	#include <inttypes.h>
	#include <stdbool.h>

	#include "wyhash.h" // https://github.com/wangyi-fudan/wymlp/blob/master/wyhash.h

	#define VALIDATE 0

	#define HASH_BUCKET_PAGE_SIZE 8192
	#define NUMBER_OF_HASH_BUCKET_PIECES 127
	#define PIECE_BUCKET_BUFFER_SIZE 63

	typedef struct hash_bucket_piece {
	uint8_t overflowed: 1;
	uint8_t bytes_used: 7;
	uint8_t data[PIECE_BUCKET_BUFFER_SIZE];
	} hash_bucket_piece_t;

	static_assert(sizeof(hash_bucket_piece_t) == 64, "hash_bucket_piece_t is expected to be 64 bytes exactly");

	typedef struct hash_bucket {
	union {
	struct {
	uint64_t number_of_entries;
	uint8_t depth;
	bool seen;
	};
	uint8_t _padding[64];
	};
	hash_bucket_piece_t pieces[NUMBER_OF_HASH_BUCKET_PIECES];
	} hash_bucket_t;

	typedef struct hash_directory {
	uint64_t number_of_entries;
	uint32_t number_of_buckets;
	uint32_t directory_pages;
	uint8_t depth;
	hash_bucket_t* buckets[0];
	} hash_directory_t;

	typedef struct hash_ctx {
	void* (*allocate_page)(uint32_t n);
	void (release_page)(void p);
	hash_directory_t* dir;
	} hash_ctx_t;

	uint64_t hash(uint64_t x) {
	x = (x ^ (x >> 30))* UINT64_C(0xbf58476d1ce4e5b9);
	x = (x ^ (x >> 27))* UINT64_C(0x94d049bb133111eb);
	x = x ^ (x >> 31);
	return x;
	}

	void varint_decode(uint8_t** buf, uint64_t* val) {
	uint64_t result = 0;
	uint32_t shift = 0;
	uint8_t* ptr = *buf;
	uint8_t cur = 0;
	do
	{
	cur = *ptr++;
	result \|= (uint64_t)(cur & 0x7f) << shift;
	shift += 7;
	} while (cur & 0x80);
	*val = result;
	*buf = ptr;
	}

	void varint_encode(uint64_t val, uint8_t** buf) {
	uint8_t* ptr = *buf;
	while (val >= 0x80) {
	*ptr++ = ((uint8_t)val \| 0x80);
	val >>= 7;
	}
	*ptr++ = (uint8_t)(val);
	*buf = ptr;
	}

	void print_bits(FILE* fd, uint64_t v, int depth)
	{
	uint8_t* b = (uint8_t*)&v;
	uint8_t byte;
	int i, j;
	int bits = sizeof(uint64_t) * 8;
	bool foundNonZero = false;
	for (i = sizeof(uint64_t) - 1; i >= 0; i--)
	{
	for (j = 7; j >= 0; j--)
	{
	bits--;
	byte = (b[i] >> j) & 1;
	if (byte) {
	foundNonZero = true;
	}
	if (!foundNonZero && bits > depth&& bits > 8)
	continue;
	fprintf(fd, "%u", byte);
	if (bits == depth)
	fprintf(fd, " ");
	}
	}
	}

	void print_hash_stats(hash_ctx_t* ctx) {

	printf("Depth: %i - Entries: %I64u, Buckets: %i \n", ctx->dir->depth, ctx->dir->number_of_entries, ctx->dir->number_of_buckets);
	for (size_t i = 0; i < ctx->dir->number_of_buckets; i++)
	ctx->dir->buckets[i]->seen = false;

	uint32_t min = 64, max = 0, total = 0,empties=0;
	uint64_t sum = 0;

	for (size_t i = 0; i < ctx->dir->number_of_buckets; i++)
	{
	hash_bucket_t* b = ctx->dir->buckets[i];
	if (b->seen)
	continue;
	b->seen = false;
	size_t total_used = 0;
	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++) {
	total_used += b->pieces[i].bytes_used;
	if (b->pieces[i].bytes_used)
	min = min < b->pieces[i].bytes_used ? min : b->pieces[i].bytes_used;
	else
	empties++;
	max = max > b->pieces[i].bytes_used ? max : b->pieces[i].bytes_used;
	sum += b->pieces[i].bytes_used;
	total++;
	}
	//printf("%p - Depth: %i, Entries: %I64u, Size: %i\n", b, b->depth, b->number_of_entries, total_used);
	}
	printf("Total: %i, Min: %i, Max: %iu, Sum: %llu, Empties: %i, Avg: %f\n", total, min, max, sum, empties, sum / (float)total);
	}

	void print_bucket(FILE* fd, hash_bucket_t* b, uint32_t idx) {
	size_t total_used = 0;
	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++) {
	total_used += b->pieces[i].bytes_used;
	}
	fprintf(fd, "\tbucket_%p [label=\"Depth: %i, Entries: %I64u, Size: %i, Index: %i\\l--------\\l", b, b->depth, b->number_of_entries, total_used, idx);
	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++) {
	hash_bucket_piece_t* p = &b->pieces[i];
	uint8_t* buf = p->data;
	uint8_t* end = p->data + p->bytes_used;
	while (buf < end)
	{
	uint64_t k, v;
	varint_decode(&buf, &k);
	varint_decode(&buf, &v);

	print_bits(fd, k, b->depth);
	fprintf(fd, " \\\| %4llu = %4llu\\l", k, v);
	}
	}
	fprintf(fd, "\"]\n");
	}

	void print_dir_graphviz_to_file(FILE* fd, hash_ctx_t* ctx) {
	fprintf(fd, "digraph hash {\n\tnode[shape = record ]; \n");
	fprintf(fd, "\ttable [label=\"Depth: %i, Size: %i\\lPages: %i, Entries: %I64u\\l\"]\n", ctx->dir->depth, ctx->dir->number_of_buckets, ctx->dir->directory_pages, ctx->dir->number_of_entries);
	fprintf(fd, "\tbuckets [label=\"");
	for (size_t i = 0; i < ctx->dir->number_of_buckets; i++)
	{
	if (i != 0)
	fprintf(fd, "\|");
	fprintf(fd, "<bucket_%i> %i - %p ", i, i, ctx->dir->buckets[i]);
	ctx->dir->buckets[i]->seen = false;
	}
	fprintf(fd, "\"]\n");
	for (size_t i = 0; i < ctx->dir->number_of_buckets; i++)
	{
	if (ctx->dir->buckets[i]->seen)
	continue;
	ctx->dir->buckets[i]->seen = true;
	print_bucket(fd, ctx->dir->buckets[i], i);
	}

	for (size_t i = 0; i < ctx->dir->number_of_buckets; i++) {
	fprintf(fd, "\tbuckets:bucket_%i -> bucket_%p;\n", i, ctx->dir->buckets[i]);
	}
	fprintf(fd, "\ttable->buckets;\n}\n");
	}

	void print_dir_graphviz(hash_ctx_t* ctx) {
	print_dir_graphviz_to_file(stdout, ctx);
	}

	void write_dir_graphviz(hash_ctx_t* ctx, const char* prefix) {
	static int counter = 0;
	uint8_t buffer[256];
	memset(buffer, 0, sizeof buffer);
	counter++;
	sprintf_s(buffer, sizeof buffer, "%s-%i.txt", prefix, counter);
	FILE* f = NULL;
	errno_t err = fopen_s(&f, buffer, "w");
	if (err)
	{
	printf("Failed to open %s - %i", buffer, err);
	return;
	}
	print_dir_graphviz_to_file(f, ctx);
	if (f != 0)
	fclose(f);

	char* copy = _strdup(buffer);
	memset(buffer, 0, sizeof buffer);
	sprintf_s(buffer, sizeof buffer, "C:\\Users\\ayende\\Downloads\\graphviz-2.38\\release\\bin\\dot.exe -Tsvg %s > %s.svg", copy, copy);
	system(buffer);
	memset(buffer, 0, sizeof buffer);
	sprintf_s(buffer, sizeof buffer, "%s.svg", copy);
	system(buffer);
	free(copy);
	}

	uint32_t hash_table_bucket_number(hash_ctx_t* ctx, uint64_t h) {
	return h & (((uint64_t)1 << ctx->dir->depth) - 1);
	}

	size_t hash_table_get_directory_capacity(hash_ctx_t* ctx) {
	return ((ctx->dir->directory_pages * HASH_BUCKET_PAGE_SIZE) - sizeof(hash_directory_t)) / sizeof(hash_bucket_t*);
	}

	hash_bucket_t* create_hash_bucket(hash_ctx_t* ctx) {
	hash_bucket_t* b = ctx->allocate_page(1);
	if (b == NULL)
	return NULL;

	memset(b, 0, sizeof(hash_bucket_t));
	b->depth = ctx->dir->depth;
	return b;
	}

	bool hash_table_get(hash_ctx_t* ctx, uint64_t key, uint64_t* value) {
	uint32_t bucket_idx = hash_table_bucket_number(ctx, key);
	hash_bucket_t* b = ctx->dir->buckets[bucket_idx];
	uint32_t piece_idx = key % NUMBER_OF_HASH_BUCKET_PIECES;

	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
	hash_bucket_piece_t* p = &b->pieces[(piece_idx + i) % NUMBER_OF_HASH_BUCKET_PIECES];
	uint8_t* buf = p->data;
	uint8_t* end = p->data + p->bytes_used;
	while (buf < end)
	{
	uint64_t k, v;
	varint_decode(&buf, &k);
	varint_decode(&buf, &v);
	if (k == key) {
	*value = v;
	return true;
	}
	}
	if (!p->overflowed)
	break;
	}

	return false;
	}

	bool hash_table_piece_append_kv(hash_bucket_t* cur, uint64_t key, uint64_t value) {
	uint32_t piece_idx = key % NUMBER_OF_HASH_BUCKET_PIECES;
	uint8_t buffer[20];
	uint8_t* write_buf = buffer;
	varint_encode(key, &write_buf);
	varint_encode(value, &write_buf);
	uint8_t size = (uint8_t)(write_buf - buffer);

	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
	hash_bucket_piece_t* p = &cur->pieces[(piece_idx + i) % NUMBER_OF_HASH_BUCKET_PIECES];
	if (size + p->bytes_used > PIECE_BUCKET_BUFFER_SIZE) {
	p->overflowed = true;
	continue;
	}
	memcpy(p->data + p->bytes_used, buffer, size);
	p->bytes_used += size;
	cur->number_of_entries++;
	return true;
	}
	return false;
	}

	void validate_bucket(hash_ctx_t* ctx, hash_bucket_t* tmp) {
	#if VALIDATE
	uint64_t mask = ((uint64_t)1 << tmp->depth) - 1;
	uint64_t first = 0;
	bool has_first = false;

	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
	uint8_t* buf = tmp->pieces[i].data;
	uint8_t* end = buf + tmp->pieces[i].bytes_used;
	while (buf < end)
	{
	uint64_t k, v;
	varint_decode(&buf, &k);
	varint_decode(&buf, &v);

	if (has_first == false)
	{
	first = k;
	has_first = true;
	}

	if ((k & mask) != (first & mask)) {
	write_dir_graphviz(ctx, "problem");
	printf("Problem\n");
	}
	}
	}
	#endif
	}

	bool hash_table_put(hash_ctx_t* ctx, uint64_t key, uint64_t value) {
	uint32_t bucket_idx = hash_table_bucket_number(ctx, key);
	hash_bucket_t* b = ctx->dir->buckets[bucket_idx];
	uint32_t piece_idx = key % NUMBER_OF_HASH_BUCKET_PIECES;

	uint8_t tmp_buffer[20]; // each varint can take up to 10 bytes
	uint8_t* buf_end = tmp_buffer;
	varint_encode(key, &buf_end);
	varint_encode(value, &buf_end);
	ptrdiff_t encoded_size = buf_end - tmp_buffer;

	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
	hash_bucket_piece_t* p = &b->pieces[(piece_idx + i) % NUMBER_OF_HASH_BUCKET_PIECES];
	uint8_t* buf = p->data;
	uint8_t* end = p->data + p->bytes_used;
	while (buf < end)
	{
	uint64_t k, v;
	uint8_t* cur_buf_start = buf;
	varint_decode(&buf, &k);
	varint_decode(&buf, &v);

	if (k == key) {
	if (v == value)
	return true; // nothing to do, value is already there
	ptrdiff_t diff = buf - cur_buf_start;
	if (diff == encoded_size) {
	// new value fit exactly where the old one went, let's put it there
	memcpy(cur_buf_start, tmp_buffer, encoded_size);
	validate_bucket(ctx, b);
	return true;
	}

	memmove(cur_buf_start, buf, end - buf);
	p->bytes_used -= (uint8_t)diff;
	b->number_of_entries--;
	ctx->dir->number_of_entries--;
	break; // found it, let's put this in
	}
	}

	if (p->bytes_used + encoded_size <= PIECE_BUCKET_BUFFER_SIZE) {
	memcpy(p->data + p->bytes_used, tmp_buffer, encoded_size);
	p->bytes_used += (uint8_t)encoded_size;
	b->number_of_entries++;
	ctx->dir->number_of_entries++;

	validate_bucket(ctx, b);
	return true; // was able to update the value in the same piece, done
	}

	// if we are looking at an overflow page, move to the next one and try to find it there
	if (!p->overflowed)
	break;
	}
	// couldn't find it in the proper place, let's put it in overflow
	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
	hash_bucket_piece_t* p = &b->pieces[(piece_idx + i) % NUMBER_OF_HASH_BUCKET_PIECES];
	if (p->bytes_used + encoded_size <= PIECE_BUCKET_BUFFER_SIZE) {
	memcpy(p->data + p->bytes_used, tmp_buffer, encoded_size);
	p->bytes_used += (uint8_t)encoded_size;
	b->number_of_entries++;
	ctx->dir->number_of_entries++;

	validate_bucket(ctx, b);
	return true; // was able to update the value in the same piece, done
	}
	p->overflowed = true;
	}

	// there is no room here, need to expand

	if (ctx->dir->depth == b->depth) {
	hash_directory_t* new_dir;
	if (ctx->dir->number_of_buckets * 2 * sizeof(hash_bucket_t*) > hash_table_get_directory_capacity(ctx)) {

	// have to increase the actual allocated memory here
	new_dir = ctx->allocate_page(ctx->dir->directory_pages * 2);
	if (new_dir == NULL)
	return false;

	size_t dir_size = ctx->dir->directory_pages * HASH_BUCKET_PAGE_SIZE;
	memcpy(new_dir, ctx->dir, dir_size);
	new_dir->directory_pages *= 2;
	}
	else {
	new_dir = ctx->dir; // there is enough space to increase size without allocations
	}
	size_t buckets_size = ctx->dir->number_of_buckets * sizeof(hash_bucket_t*);
	memcpy((uint8_t)new_dir->buckets + buckets_size, (uint8_t)ctx->dir->buckets, buckets_size);
	new_dir->depth++;
	new_dir->number_of_buckets *= 2;
	if (new_dir != ctx->dir) {
	ctx->release_page(ctx->dir);
	ctx->dir = new_dir;
	}
	}
	//write_dir_graphviz(ctx, "BEFORE");
	hash_bucket_t* n = create_hash_bucket(ctx);
	if (!n)
	return false;

	hash_bucket_t* tmp = ctx->allocate_page(1);
	if (!tmp) {
	ctx->release_page(n);
	ctx->release_page(tmp);
	// no need to release the ctx->dir we allocated, was wired
	// properly to the table and will be freed with the whole table
	return false;
	}
	memcpy(tmp, b, HASH_BUCKET_PAGE_SIZE);
	memset(b, 0, sizeof(hash_bucket_t));
	memset(n, 0, sizeof(hash_bucket_t));
	n->depth = b->depth = tmp->depth + 1;

	uint32_t bit = 1 << tmp->depth;

	#if VALIDATE
	uint64_t mask = (bit >> 1) - 1;
	bool has_first = false;
	uint64_t first_key = 0;
	#endif

	for (size_t i = 0; i < NUMBER_OF_HASH_BUCKET_PIECES; i++)
	{
	uint8_t* buf = tmp->pieces[i].data;
	uint8_t* end = buf + tmp->pieces[i].bytes_used;
	while (buf < end)
	{
	uint64_t k, v;
	varint_decode(&buf, &k);
	varint_decode(&buf, &v);
	#if VALIDATE
	if (!has_first) {
	first_key = k;
	has_first = true;
	}

	if ((first_key & mask) != (k & mask)) {
	printf("mistmatch!: %I64u != %I64u\n", first_key, k);
	}
	#endif

	hash_bucket_t* cur = k & bit ? n : b;
	bool success = hash_table_piece_append_kv(cur, k, v);
	#if VALIDATE
	if (!success)
	printf("Can't split a page properly? Impossible");
	#endif
	}
	}
	ctx->release_page(tmp);

	for (size_t i = key & (bit - 1); i < ctx->dir->number_of_buckets; i += bit)
	{
	ctx->dir->buckets[i] = i & bit ? n : b;
	}

	// now can add the new value in...
	{
	hash_bucket_t* cur = key & bit ? n : b;
	if (hash_table_piece_append_kv(cur, key, value)) {
	ctx->dir->number_of_entries++;
	}
	else {
	if (!hash_table_put(ctx, key, value)) {
	#if VALIDATE
	printf("Unable to recursively add? That really should never happen.");
	#endif
	return false;
	}
	}
	}

	validate_bucket(ctx, n);
	validate_bucket(ctx, b);
	return true;
	}


	bool hash_table_init(hash_ctx_t* ctx) {
	ctx->dir = ctx->allocate_page(1);
	if (ctx->dir == NULL)
	return false;

	memset(ctx->dir, 0, HASH_BUCKET_PAGE_SIZE);
	ctx->dir->number_of_entries = 0;
	ctx->dir->number_of_buckets = 2;
	ctx->dir->directory_pages = 1;
	ctx->dir->depth = 1;

	ctx->dir->buckets[0] = create_hash_bucket(ctx);
	ctx->dir->buckets[1] = create_hash_bucket(ctx);

	if (!ctx->dir->buckets[0] \|\| !ctx->dir->buckets[1]) {
	ctx->release_page(ctx->dir->buckets[0]);
	ctx->release_page(ctx->dir->buckets[1]);
	ctx->release_page(ctx->dir);
	return false;
	}

	return true;
	}

	int allocations = 0;

	void* allocate_4k_page(uint32_t n) {
	allocations++;
	return _aligned_malloc(HASH_BUCKET_PAGE_SIZE * n, HASH_BUCKET_PAGE_SIZE);
	}

	void release_4k_page(void* p) {
	if (!p)
	return;
	allocations--;
	_aligned_free(p);
	}

	int main()
	{
	srand(19324);
	uint32_t const size = 1024 * 1024;

	uint64_t* keys = malloc(size * sizeof(uint64_t));
	uint64_t* values = malloc(size * sizeof(uint64_t));
	if (!keys \|\| !values)
	return -1;


	for (size_t i = 0; i < size; i++)
	{
	keys[i] = wygrand();
	values[i] = wygrand();
	}

	struct hash_ctx ctx = { allocate_4k_page, release_4k_page };
	if (!hash_table_init(&ctx)) {
	printf("Failed to init\n");
	return -1;
	}

	uint64_t first_key = keys[0];
	uint64_t first_value = values[0];


	for (size_t i = 0; i < size; i++)
	{
	uint64_t v;
	if (i == 184) {
	printf("a - ");
	}

	if (!hash_table_put(&ctx, keys[i], values[i])) {
	printf("Failed to put %i\n", i);
	write_dir_graphviz(&ctx, "PUT-ERR");
	return -1;
	}

	if (keys[i] == first_key)
	first_value = values[i];

	if (!hash_table_get(&ctx, first_key, &v) \|\| v != first_value) {
	printf("Failed to get 0 on %i\n", i);
	write_dir_graphviz(&ctx, "PUT-ERR");
	}
	}

	//write_dir_graphviz(&ctx, "FINAL");
	for (size_t x = 0; x < size; x++)
	{
	uint64_t val;

	if (!hash_table_get(&ctx, keys[x], &val)) {
	printf("Failed to get %i\n", x);
	write_dir_graphviz(&ctx, "GET-ERR");
	return -1;
	}
	if (val != values[x]) {
	printf("mismatch on %I32u - %I64u - %I64u, %I64u \n", x, keys[x], val, values[x]);
	}
	}
	print_hash_stats(&ctx);

	printf("%f MB\n", ((double)allocations * (double)HASH_BUCKET_PAGE_SIZE) / 1024.0 / 1024.0);

	return 0;
	}