Last active
March 17, 2018 23:48
-
-
Save saolsen/ae86098700950086160dcb4c0c26e0ca to your computer and use it in GitHub Desktop.
steve_lib.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Collections and helpers for c programming. | |
// Version: 0.1.3 | |
// Shoutouts to @nothings and @pervognsen who I learned / copied most of this from | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <stdarg.h> | |
#include <string.h> | |
#include <assert.h> | |
// @NOTE: Taken from here https://github.com/lemire/fastrange | |
#if defined(_MSC_VER) && defined (_WIN64) | |
#include <intrin.h>// should be part of all recent Visual Studio | |
#pragma intrinsic(_umul128) | |
#endif // defined(_MSC_VER) && defined (_WIN64) | |
#define range(i,n) _range((uint64_t)i, n) | |
static inline uint64_t _range(uint64_t i, uint64_t n) { | |
#ifdef __SIZEOF_INT128__ // @Q: Is this right for osx/linux? | |
return (uint64_t)(((__uint128_t)i * (__uint128_t)n) >> 64); | |
#elif defined(_MSC_VER) && defined(_WIN64) | |
// supported in Visual Studio 2005 and better | |
uint64_t highProduct; | |
_umul128(i, n, &highProduct); // ignore output | |
return highProduct; | |
unsigned __int64 _umul128( | |
unsigned __int64 Multiplier, | |
unsigned __int64 Multiplicand, | |
unsigned __int64 *HighProduct | |
); | |
#else | |
assert(0 && "USING SLOW MODULO"); | |
return i % n; // fallback | |
#endif // __SIZEOF_INT128__ | |
} | |
#define MAX(x,y) ((x) >= (y) ? (x) : (y)) | |
typedef struct ArrayHeader { | |
uint64_t length; | |
uint64_t capacity; | |
uint8_t arr[]; | |
} ArrayHeader; | |
#define _ahead(a) ((ArrayHeader*)((uint8_t*)a - offsetof(struct ArrayHeader, arr))) | |
#define _aneedgrow(a,n) (alen(a) + n > acap(a)) | |
#define _agrow(a,n) ((a) = _agrowf((a), alen(a) + (n), sizeof(*(a)))) | |
#define _amaybegrow(a,n) (_aneedgrow(a,n) ? _agrow(a,n) : 0) | |
#define apush(a, ...) (_amaybegrow(a,1), (a)[_ahead(a)->length++] = (__VA_ARGS__)) | |
#define aadd(a, n) (_amaybegrow(a,n), _ahead(a)->length+=(n), &(a)[alen(a)-(n)]) | |
#define alen(a) ((a) ? _ahead(a)->length : 0) | |
#define acap(a) ((a) ? _ahead(a)->capacity : 0) | |
#define aend(a) (a)+alen(a) | |
#define afree(a) ((a) ? free((ArrayHeader*)a-1), (a) = NULL) : 0) | |
static void* _agrowf_raw(void *a, size_t new_len, size_t itemsize) | |
{ | |
size_t new_cap = MAX(1 + 2*acap(a), new_len); | |
assert(new_len <= new_cap); | |
size_t new_size = offsetof(struct ArrayHeader, arr) + new_cap * itemsize; | |
ArrayHeader *new_header; | |
if (a) { | |
new_header = realloc(_ahead(a), new_size); | |
} else { | |
new_header = malloc(new_size); | |
new_header->length = 0; | |
} | |
new_header->capacity = new_cap; | |
return new_header->arr; | |
} | |
#ifdef __cplusplus | |
template<class T> | |
static T * _agrowf(T * a, uint64_t num, uint64_t itemsize) | |
{ | |
return (T*)_agrowf_raw((void *)a, num, itemsize); | |
} | |
#else | |
#define _agrowf _agrowf_raw | |
#endif | |
#define aprintf(a, ...) ((a) = a__printf((a), __VA_ARGS__)) | |
char *a__printf(char *a, const char *fmt, ...) { | |
// Assert that our buf is null terminated if there's anything in it. | |
assert(!a || alen(a) != 0 && a[alen(a)-1] == 0); | |
va_list args; | |
va_start(args, fmt); | |
size_t n = vsnprintf(NULL, 0, fmt, args); | |
va_end(args); | |
char *dest; | |
if (alen(a) == 0) { | |
dest = aadd(a, n+1); | |
} else { | |
dest = aadd(a, n) - 1; | |
} | |
va_start(args, fmt); | |
vsnprintf(dest, n+1, fmt, args); | |
va_end(args); | |
return a; | |
} | |
#define NOT_FOUND (void*)0xFFFFFFFFFFFFFFFF | |
typedef struct { | |
void* key; | |
void* value; | |
} HashPair; | |
typedef struct { | |
HashPair *table; | |
uint64_t count; | |
uint64_t capacity; | |
} HashTable; | |
int hinit(HashTable *h, uint64_t n) | |
{ | |
if (n < 16) n = 16; // @TODO: Figure out ideal size multiples. | |
h->count = 0; | |
h->capacity = n; | |
// @TODO: Grow and shrink thresholds | |
h->table = (HashPair*)malloc(sizeof(*h->table) * n); | |
if (!h->table) return 0; // @TODO: return error, out of memory. | |
for (int i = 0; i < n; i++) { | |
h->table[i].key = NOT_FOUND; | |
} | |
return 1; | |
} | |
void hfree(HashTable *h) | |
{ | |
h->count = 0; | |
free(h->table); | |
h->table = NULL; | |
h->capacity = 0; | |
} | |
void hclear(HashTable *h) | |
{ | |
h->count = 0; | |
for (int i = 0; i < h->capacity; h++) { | |
h->table[i].key = NOT_FOUND; | |
} | |
} | |
// @TODO: Deletion | |
// It interacts with resizing and chaining so gonna take a sec to figure out. | |
#define hput(h,k,v) _hput(h, (void*)k, (void*)v) | |
void _hput(HashTable *h, void *key, void *value) | |
{ | |
assert(key < NOT_FOUND); | |
assert(value < NOT_FOUND); | |
if (h->count == h->capacity) { | |
if (h->capacity < 8) h->capacity = 8; | |
uint64_t n = 2 * h->capacity; | |
uint64_t count = 0; | |
HashPair *new_table = (HashPair*)malloc(sizeof(*h->table) * n); | |
for (int i=0; i<n; i++) { new_table[i].key = NOT_FOUND; } | |
if (h->table) { | |
for (int old_i=0; old_i<h->capacity; old_i++) { | |
HashPair *kvp = h->table + old_i; | |
if (kvp->key != NOT_FOUND) { | |
uint64_t new_hash_index = range(kvp->key, n); | |
uint64_t i = new_hash_index; | |
do { | |
if (i == n) i = 0; | |
HashPair *new_kvp = new_table + i; | |
if (new_kvp->key == NOT_FOUND) { | |
new_kvp->key = kvp->key; | |
new_kvp->value = kvp->value; | |
count++; | |
break; | |
} | |
} while (++i != new_hash_index); | |
} | |
} | |
free(h->table); | |
} | |
h->capacity = n; | |
h->count = count; | |
h->table = new_table; | |
} | |
uint64_t hash_index = range(key, h->capacity); | |
assert(hash_index < h->capacity); | |
uint64_t i = hash_index; | |
// @TODO: Investigate probing techniques, this is just linear, is there something better? | |
// robin hood hashing and also a max probe distance would help | |
do { | |
if (i == h->capacity) i = 0; | |
HashPair *kvp = h->table + i; | |
if (kvp->key == NOT_FOUND || kvp->key == key) { | |
kvp->value = value; | |
if (kvp->key == NOT_FOUND) { | |
kvp->key = key; | |
h->count++; | |
} | |
return; | |
} | |
} while (++i != hash_index); | |
assert(0 && "NO FREE SLOTS"); | |
} | |
#define hget(h,k) _hget(h,(void*)k) | |
#define hgetu(h,k) (uint64_t)_hget(h, (void*)k) | |
#define hgeti(h,k) (int64_t)_hget(h, (void*)k) | |
void* _hget(HashTable *h, void *key) | |
{ | |
assert(key < NOT_FOUND); | |
if (!h->table) { return NOT_FOUND; } | |
uint64_t hash_index = range(key, h->capacity); | |
assert(hash_index < h->capacity); | |
uint64_t i = hash_index; | |
do { | |
if (i == h->capacity) i = 0; | |
HashPair *kvp = h->table + i; | |
if (kvp->key == NOT_FOUND) { | |
return NOT_FOUND; | |
} | |
if (kvp->key == key) { | |
return kvp->value; | |
} | |
} while (++i != hash_index); | |
return NOT_FOUND; | |
} | |
typedef struct { | |
char *base; | |
uint64_t used; | |
uint64_t capacity; | |
} Arena; | |
#define MIN_ALLOC_SIZE 4096 | |
char *push(Arena *a, uint64_t size) | |
{ | |
if (a->used + size > a->capacity) { | |
// @TODO: keep track of previous pages if you want to be able to reset or free. | |
uint64_t n = size < MIN_ALLOC_SIZE ? MIN_ALLOC_SIZE : size; | |
a->base = (char*)malloc(n); | |
a->used = 0; | |
a->capacity = n; | |
} | |
char *result = a->base + a->used; | |
a->used += size; | |
return result; | |
} | |
typedef struct InternedString { | |
struct InternedString *next; | |
char str[]; | |
} InternedString; | |
static Arena string_arena = {0}; | |
static HashTable string_table = {0}; | |
// @TODO: Investigate string hash functions. | |
static uint64_t hash(char *str, size_t length) | |
{ | |
uint64_t h = 5381; | |
for (int i=0; i<length; i++) { | |
h = ((h << 5) + h) + str[i]; | |
} | |
return h; | |
} | |
const char *intern_range(char* start, const char *end) | |
{ | |
uint64_t key = hash(start, end - start); | |
InternedString *head = (InternedString*)hget(&string_table, key); | |
if (head != NOT_FOUND) { | |
for (InternedString *node = head; node; node->next) { | |
const char *s1 = node->str; | |
const char *s2 = start; | |
while (*s1 && s2 != end && *s1 == *s2) { | |
s1++; | |
s2++; | |
} | |
if (*s1 == '\0' && s2 == end) { | |
return node->str; | |
} | |
} | |
} | |
// Intern new string. | |
size_t size = end - start + 1; | |
InternedString *new_head = (InternedString*)push(&string_arena, sizeof(InternedString*) + size); | |
new_head->next = head; | |
memcpy(new_head->str, start, size); | |
new_head->str[size-1] = '\0'; | |
hput(&string_table, key, new_head); | |
return new_head->str; | |
} | |
const char *intern_size(char *start, uint64_t size) { | |
return intern_range(start, start+size); | |
} | |
// Intern a null terminated string. | |
// @TODO: Could be more efficiant. | |
const char *intern_str(char *s) | |
{ | |
return intern_size(s, strlen(s)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment