Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@skeeto
Last active August 4, 2022 17:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skeeto/0ec21f9c6d9ea4e72ac6da3c4fe29ce0 to your computer and use it in GitHub Desktop.
Save skeeto/0ec21f9c6d9ea4e72ac6da3c4fe29ce0 to your computer and use it in GitHub Desktop.
Hash set demonstration
// cc -g -maes -o example example.c
// https://old.reddit.com/r/C_Programming/comments/w71gmw
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#if __AES__ || (_MSC_VER && _M_AMD64)
# include <smmintrin.h>
# include <wmmintrin.h>
#endif
#define MAX 50
#define HT_LEN 256
#define STR(s) (struct str){s}
struct str { unsigned char buf[16]; };
// Compute a 32-bit hash from a 16-byte input string.
static uint32_t
hash(struct str s)
{
#if __AES__ || (_MSC_VER && _M_AMD64)
// "Encrypt" the string with two rounds of AES
__m128i h, k;
k = _mm_loadu_si128((void *)s.buf);
h = _mm_aesenc_si128(k, k);
h = _mm_aesenc_si128(h, k);
return _mm_extract_epi32(h, 0);
#else
// Portable fallback hash
uint64_t a, b;
memcpy(&a, s.buf+0, 8);
memcpy(&b, s.buf+8, 8);
a *= 1111111111111111111;
a ^= a >> 33;
a += b;
a *= 1111111111111111111;
a ^= a >> 33;
return (uint32_t)a;
#endif
}
// Returns the next hash table index for this hash. Use negative i for
// the first call.
static int
ht_next(uint32_t hash, int i)
{
int mask = HT_LEN - 1;
int step = (hash>>16 | 1) & mask;
return i<0 ? (int)(hash&mask) : (i + step)&mask;
}
#define COMPUTE_INIT {{{{0}}}, 0, {0}, 0}
struct compute {
struct str stack[MAX];
int stack_len;
int ht[HT_LEN];
int ht_len;
};
// Clear the hash table and rebuild it from the stack.
static void
compute_rehash(struct compute *c)
{
// TODO: Consider rekeying the hash table by using a counter as a
// hash key and incrementing it upon rehash. This would help break
// out of pathological, high collision input sets, whether malicious
// or not.
memset(c->ht, 0, sizeof(c->ht));
for (int si = 0; si < c->stack_len; si++) {
uint32_t h = hash(c->stack[si]);
for (int i = -1;;) {
i = ht_next(h, i);
if (!c->ht[i]) {
c->ht[i] = si + 1;
break;
}
}
}
c->ht_len = c->stack_len;
}
// Try to push a new string onto the stack, returning 1 on success.
static int
compute_push(struct compute *c, struct str s)
{
assert(c->stack_len < MAX);
assert(s.buf[15] == 0);
uint32_t h = hash(s);
int tombstone = -1;
for (int i = -1;;) {
i = ht_next(h, i);
int si = c->ht[i] - 1;
// Empty slot?
if (si == -1) {
c->stack[c->stack_len++] = s;
if (tombstone < 0) {
if (c->ht_len > HT_LEN/2) {
compute_rehash(c);
} else {
c->ht[i] = c->stack_len;
c->ht_len++;
}
} else {
c->ht[tombstone] = c->stack_len;
}
return 1;
}
// Already present?
if (si < c->stack_len && !memcmp(s.buf, c->stack[si].buf, 16)) {
return 0;
}
// First tombstone found?
if (tombstone < 0 && si >= c->stack_len) {
tombstone = i;
}
}
}
// Print the current compute state (for debugging).
static void
compute_print(struct compute *c)
{
puts("stack:");
for (int i = 0; i < c->stack_len; i++) {
printf("\t%d\t%s\n", i, c->stack[i].buf);
}
puts("hashtable:");
for (int i = 0; i < HT_LEN; i++) {
if (c->ht[i]) {
char *note = c->ht[i]<=c->stack_len ? "" : " (tombstone)";
printf("\t%d\t%s%s\n", i, c->stack[c->ht[i]-1].buf, note);
}
}
putchar('\n');
}
// Remove the most recently-added item from the compute state.
static void
compute_backtrack(struct compute *c)
{
assert(c->stack_len);
c->stack_len--;
}
int main(void)
{
struct compute c = COMPUTE_INIT;
struct str test[] = {
{"hello"},
{"world"},
{"hello"},
{"world"},
{"example"},
};
for (int i = 0; i < (int)(sizeof(test)/sizeof(*test)); i++) {
printf("push(%s)\t= %d\n", test[i].buf, compute_push(&c, test[i]));
}
compute_print(&c);
puts("backtrack()");
compute_backtrack(&c);
compute_print(&c);
puts("rehash()");
compute_rehash(&c);
compute_print(&c);
puts("backtrack()");
compute_backtrack(&c);
compute_print(&c);
// "copped" collides with "world" with HT_LEN==256 and little endian
struct str collision = STR("copped");
printf("push(%s)\t= %d\n", collision.buf, compute_push(&c, collision));
compute_print(&c);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment