Skip to content

Instantly share code, notes, and snippets.

@submachine
Created July 6, 2012 23:34
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save submachine/3063407 to your computer and use it in GitHub Desktop.
Save submachine/3063407 to your computer and use it in GitHub Desktop.
A toy x86_64 pointer-hash benchmark to go with the StackOverflow answer at: http://stackoverflow.com/a/11364619/274261
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#define HASH_BITS 10
#define BUCKETS (1 << HASH_BITS)
/* A half-avalanche 4 byte integer hash function, as described at:
http://burtleburtle.net/bob/hash/integer.html */
uint32_t half_avalanche (uint32_t a);
/* A hash function written to play nice with x86_64 virtual addresses,
as described at: http://stackoverflow.com/a/11364619/274261 */
uint32_t hash (void * p);
void **buckets;
bool *collisions;
/* A toy benchmark for `hash'. 'toy' is the key word here. Tests for hash
collisions between pointers to memory allocated on the heap. */
int main (void)
{
int d, i, values, n_collision;
void * p;
uint32_t index;
int n_collision_buckets;
srand (33);
buckets = malloc (BUCKETS * sizeof (void *));
collisions = malloc (BUCKETS * sizeof (bool));
/* We will actually leak memory during the execution. But freeing any
can cause malloc to return the same pointers again, and give us false
collisions. So, we just let the leak be. */
for (d = 2; d >= 0; d--)
{
n_collision = 0;
values = BUCKETS >> d;
/* Empty the buckets. */
for (i = 0; i < BUCKETS; i++)
{
buckets[i] = NULL;
collisions[i] = false;
}
for (i = 0; i < values; i++)
{
/* Allocate either a small even sized block, or a big one. */
if (rand () % 2)
p = malloc (2 << (rand () % 3));
else
p = malloc (512 + (rand () % 1024));
index = hash (p);
if (buckets[index] != NULL)
{
n_collision++;
collisions[index] = true;
}
buckets[index] = p;
}
n_collision_buckets = 0;
for (i = 0; i < BUCKETS; i++)
if (collisions[i])
n_collision_buckets ++;
printf ("%d buckets, %d values, %d collissions across %d buckets\n",
BUCKETS, values, n_collision, n_collision_buckets);
}
return 0;
}
uint32_t hash (void * p)
{
uint64_t a, msbs;
a = (uint64_t) p;
/* Drop two LSBs. */
a >>= 2;
/* Get rid of the MSBs. Keep 46 bits. */
a &= 0x3fffffffffff;
/* Get the 14 MSBs and fold them in to get a 32 bit integer.
The MSBs are mostly 0s anyway, so we don't lose much entropy */
msbs = (a >> 32) << 18;
a ^= msbs;
return half_avalanche ((uint32_t) a) >> (32 - HASH_BITS);
}
uint32_t half_avalanche (uint32_t a)
{
a = (a+0x479ab41d) + (a<<8);
a = (a^0xe4aa10ce) ^ (a>>5);
a = (a+0x9942f0a6) - (a<<14);
a = (a^0x5aedd67d) ^ (a>>3);
a = (a+0x17bea992) + (a<<7);
return a;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment