Skip to content

Instantly share code, notes, and snippets.

@simonhf
Last active October 26, 2016 00:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonhf/48f48dd00c64c351dec42611047009d8 to your computer and use it in GitHub Desktop.
Save simonhf/48f48dd00c64c351dec42611047009d8 to your computer and use it in GitHub Desktop.
1 cache line fetch per hash table read simulation experimentation lab
#include <stdio.h>
#include <stdlib.h>
#include <locale.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <unistd.h>
#include <murmurhash3.h>
// gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
#define CACHE_LINE_SIZE (1024)
#define SLOTS_PER_BUCKET (CACHE_LINE_SIZE / 2 / sizeof(uint32_t))
typedef struct BUCKET {
uint32_t key[SLOTS_PER_BUCKET];
uint32_t val[SLOTS_PER_BUCKET];
} __attribute__((packed)) BUCKET;
double
get_time_in_seconds(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
}
void
main(void)
{
setlocale(LC_NUMERIC, "");
printf("- starting\n");
size_t memory_to_allocate = 1 << 29;
size_t size_of_cache_line = sizeof(BUCKET);
size_t number_of_buckets = memory_to_allocate / size_of_cache_line;
printf("- mmap() %'zu bytes or %zu MB containing %'zu buckets of size %zu with %zu slots per bucket or %'zu total slots\n", memory_to_allocate, memory_to_allocate / 1024 / 1024, number_of_buckets, size_of_cache_line, SLOTS_PER_BUCKET, SLOTS_PER_BUCKET * number_of_buckets);
BUCKET * buckets = mmap (NULL, memory_to_allocate, PROT_READ | PROT_WRITE,MAP_SHARED | MAP_ANONYMOUS, -1, 0);
uint64_t key;
uint64_t hash[2];
uint32_t i = 0;
double t1 = get_time_in_seconds();
while(1) {
key = i;
REHASH1:;
MurmurHash3_x64_128(&key, sizeof(key), 123456, &hash[0]);
uint32_t bucket = hash[0] % number_of_buckets;
if(0 == bucket) { printf("- hit magic bucket 0; re-hashing key %'zu\n", key); key += 1 + key; goto REHASH1; }
//debug printf("- i=%zu 0x%16zx 0x%16zx bucket=%'u\n", i, hash[0], hash[1], bucket);
for(uint32_t j = 0; j < (SLOTS_PER_BUCKET - 1); j++) {
if(0 == buckets[bucket].key[j]) {
//debug printf("- inserted at j=%u\n", j);
buckets[bucket].key[j] = key;
buckets[bucket].val[j] = key;
goto INSERTED;
}
}
goto EARLY_OUT_PUT;
INSERTED:;
i ++;
}
EARLY_OUT_PUT:;
uint32_t i_max = i;
double t2 = get_time_in_seconds();
printf("- put %'u keys (before collision) in %f seconds or %'.0f inserts per second\n", i_max, t2 - t1, i_max / (t2 - t1));
uint32_t process_number;
uint32_t process_number_max = 1;
for(process_number = 1; process_number <= process_number_max; process_number ++) {
pid_t pid = fork();
if(0 == pid) {
goto PUT_KID_TO_WORK;
}
}
printf("- parent ending after fork()ing %u kids\n", process_number_max);
exit(0);
PUT_KID_TO_WORK:;
#define PRECALC_LEN (8)
for(uint32_t loop = 0; loop < 4; loop ++) {
uint32_t prefetch_active = 0;
if(loop >= 1) { prefetch_active = 1; }
uint32_t precalc_len_multiple = 1;
if(2 == loop) { precalc_len_multiple = 2; }
if(3 == loop) { precalc_len_multiple = 3; }
int32_t precalc_put_pos = 0;
int32_t precalc_get_pos = 0 - (PRECALC_LEN * (precalc_len_multiple - 1));
uint32_t precalc_bucket[PRECALC_LEN * precalc_len_multiple];
uint32_t precalc_key[PRECALC_LEN * precalc_len_multiple];
uint32_t precalc_val[PRECALC_LEN * precalc_len_multiple];
uint64_t count = 0;
//debug printf("- precalc_put_pos=%d, precalc_get_pos=%d\n", precalc_put_pos, precalc_get_pos);
i = process_number * 100000;
t1 = get_time_in_seconds();
while(1) {
for(uint32_t k = 0; k < PRECALC_LEN; k ++) {
key = i;
REHASH2:;
precalc_key[k + precalc_put_pos] = key;
MurmurHash3_x64_128(&key, sizeof(key), 123456, &hash[0]);
precalc_bucket[k + precalc_put_pos] = hash[0] % number_of_buckets;
if(0 == precalc_bucket[k + precalc_put_pos]) { /* printf("- hit magic bucket 0; re-hashing key %'zu\n", key); */ key += 1 + key; goto REHASH2; }
#define PREFETCH_LOCALITY (0)
if(prefetch_active) { __builtin_prefetch(&buckets[precalc_bucket[k + precalc_put_pos]].key[0], 0, PREFETCH_LOCALITY); }
count ++;
i ++;
if(i >= i_max) { i = 0; }
if(0 == (i & 0xffff)) {
t2 = get_time_in_seconds();
if((t2 - t1) >= 5) {
goto EARLY_OUT_GET;
}
}
}
if(precalc_get_pos >= 0) {
for(uint32_t k = 0; k < PRECALC_LEN; k ++) {
uint32_t bucket = precalc_bucket[k + precalc_get_pos];
uint32_t key = precalc_key[k + precalc_get_pos];
for(uint32_t j = 0; j < (SLOTS_PER_BUCKET - 1); j++) {
if(key == buckets[bucket].key[j]) {
precalc_val[k + precalc_get_pos] = buckets[bucket].key[j];
goto GOT;
}
}
printf("- ERROR: INTERNAL: key not found for key=%'u in bucket=%'u !\n", key, bucket);
exit(1);
GOT:;
}
}
precalc_put_pos += PRECALC_LEN;
precalc_get_pos += PRECALC_LEN;
precalc_put_pos = precalc_put_pos >= (PRECALC_LEN * precalc_len_multiple) ? 0 : precalc_put_pos;
precalc_get_pos = precalc_get_pos >= (PRECALC_LEN * precalc_len_multiple) ? 0 : precalc_get_pos;
}
EARLY_OUT_GET:;
printf("- process_number=%u loop=%u prefetch_active=%u precalc_len_multiple=%u got %'zu keys in %f seconds or %'.0f gets per second\n", process_number, loop, prefetch_active, precalc_len_multiple, count, t2 - t1, count / (t2 - t1));
}
printf("- process_number=%u ending\n", process_number);
}
//
// experiments with a 64 byte bucket and changing the number of parallel processes
//
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64
// - put 4,903,147 keys (before collision) in 0.746633 seconds or 6,567,010 inserts per second
// - parent ending after fork()ing 1 kids
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 76,265,253 keys in 5.002618 seconds or 15,245,069 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 102,747,068 keys in 5.000722 seconds or 20,546,446 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 121,704,296 keys in 5.002531 seconds or 24,328,544 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 122,031,976 keys in 5.000540 seconds or 24,403,760 gets per second
// - process_number=1 ending
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64
// - put 4,903,147 keys (before collision) in 0.784651 seconds or 6,248,827 inserts per second
// - parent ending after fork()ing 2 kids
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 65,572,527 keys in 5.001105 seconds or 13,111,608 gets per second
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 65,672,527 keys in 5.001063 seconds or 13,131,714 gets per second
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 87,544,411 keys in 5.002060 seconds or 17,501,671 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 87,251,195 keys in 5.002263 seconds or 17,442,344 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 94,698,193 keys in 5.002471 seconds or 18,930,283 gets per second
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 97,994,012 keys in 5.004300 seconds or 19,581,961 gets per second
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 110,028,530 keys in 5.000954 seconds or 22,001,507 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 110,718,354 keys in 5.002635 seconds or 22,132,007 gets per second
// - process_number=2 ending
// - process_number=1 ending
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64
// - put 4,903,147 keys (before collision) in 0.744962 seconds or 6,581,739 inserts per second
// - parent ending after fork()ing 3 kids
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 56,256,057 keys in 5.002871 seconds or 11,244,755 gets per second
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 52,698,094 keys in 5.003711 seconds or 10,531,802 gets per second
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 58,422,137 keys in 5.007053 seconds or 11,667,968 gets per second
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 75,803,109 keys in 5.000443 seconds or 15,159,279 gets per second
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 79,364,464 keys in 5.001411 seconds or 15,868,415 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 76,199,717 keys in 5.001988 seconds or 15,233,887 gets per second
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 92,675,238 keys in 5.004450 seconds or 18,518,566 gets per second
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 79,954,288 keys in 5.005317 seconds or 15,973,871 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 74,561,317 keys in 5.002349 seconds or 14,905,261 gets per second
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 85,216,187 keys in 5.002121 seconds or 17,036,011 gets per second
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 86,823,515 keys in 5.000560 seconds or 17,362,758 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 93,006,310 keys in 5.000277 seconds or 18,600,231 gets per second
// - process_number=3 ending
// - process_number=2 ending
// - process_number=1 ending
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64
// - put 4,903,147 keys (before collision) in 0.772905 seconds or 6,343,791 inserts per second
// - parent ending after fork()ing 4 kids
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 51,683,982 keys in 5.002305 seconds or 10,332,033 gets per second
// - process_number=4 loop=0 prefetch_active=0 precalc_len_multiple=1 got 55,697,305 keys in 5.004646 seconds or 11,129,120 gets per second
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 56,028,377 keys in 5.001612 seconds or 11,202,064 gets per second
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 51,221,838 keys in 5.003998 seconds or 10,236,183 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 67,310,927 keys in 5.001811 seconds or 13,457,311 gets per second
// - process_number=4 loop=1 prefetch_active=1 precalc_len_multiple=1 got 66,683,247 keys in 5.004323 seconds or 13,325,128 gets per second
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 68,193,967 keys in 5.003913 seconds or 13,628,128 gets per second
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 65,931,279 keys in 5.002827 seconds or 13,178,805 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 63,587,428 keys in 5.000220 seconds or 12,716,926 gets per second
// - process_number=4 loop=2 prefetch_active=1 precalc_len_multiple=2 got 72,569,434 keys in 5.005618 seconds or 14,497,597 gets per second
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 78,196,869 keys in 5.005375 seconds or 15,622,580 gets per second
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 76,261,861 keys in 5.003949 seconds or 15,240,336 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 75,740,965 keys in 5.000678 seconds or 15,146,140 gets per second
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 70,279,066 keys in 5.003276 seconds or 14,046,610 gets per second
// - process_number=4 loop=3 prefetch_active=1 precalc_len_multiple=3 got 71,717,466 keys in 5.009659 seconds or 14,315,838 gets per second
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 73,247,205 keys in 5.000062 seconds or 14,649,259 gets per second
// - process_number=1 ending
// - process_number=2 ending
// - process_number=4 ending
// - process_number=3 ending
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64
// - put 4,903,147 keys (before collision) in 0.737446 seconds or 6,648,823 inserts per second
// - parent ending after fork()ing 5 kids
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 47,436,195 keys in 5.003296 seconds or 9,480,989 gets per second
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 48,088,163 keys in 5.000460 seconds or 9,616,748 gets per second
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 45,304,579 keys in 5.006865 seconds or 9,048,492 gets per second
// - process_number=4 loop=0 prefetch_active=0 precalc_len_multiple=1 got 46,611,907 keys in 5.001497 seconds or 9,319,591 gets per second
// - process_number=5 loop=0 prefetch_active=0 precalc_len_multiple=1 got 51,939,342 keys in 5.002657 seconds or 10,382,351 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 54,162,297 keys in 5.001440 seconds or 10,829,340 gets per second
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 66,848,783 keys in 5.001389 seconds or 13,366,043 gets per second
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 61,259,204 keys in 5.000600 seconds or 12,250,371 gets per second
// - process_number=4 loop=1 prefetch_active=1 precalc_len_multiple=1 got 61,321,348 keys in 5.006056 seconds or 12,249,433 gets per second
// - process_number=5 loop=1 prefetch_active=1 precalc_len_multiple=1 got 64,486,095 keys in 5.001064 seconds or 12,894,475 gets per second
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 64,751,631 keys in 5.002955 seconds or 12,942,677 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 67,310,927 keys in 5.006616 seconds or 13,444,396 gets per second
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 75,444,357 keys in 5.004708 seconds or 15,074,677 gets per second
// - process_number=4 loop=2 prefetch_active=1 precalc_len_multiple=2 got 59,486,340 keys in 5.003384 seconds or 11,889,221 gets per second
// - process_number=5 loop=2 prefetch_active=1 precalc_len_multiple=2 got 74,882,213 keys in 5.000509 seconds or 14,974,918 gets per second
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 72,407,290 keys in 5.002365 seconds or 14,474,611 gets per second
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 65,244,847 keys in 5.002451 seconds or 13,042,575 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 70,903,354 keys in 5.004322 seconds or 14,168,424 gets per second
// - process_number=4 loop=3 prefetch_active=1 precalc_len_multiple=3 got 67,731,823 keys in 5.003001 seconds or 13,538,239 gets per second
// - process_number=5 loop=3 prefetch_active=1 precalc_len_multiple=3 got 67,304,143 keys in 5.002435 seconds or 13,454,276 gets per second
// - process_number=2 ending
// - process_number=1 ending
// - process_number=4 ending
// - process_number=3 ending
// - process_number=5 ending
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64
// - put 4,903,147 keys (before collision) in 0.725215 seconds or 6,760,957 inserts per second
// - parent ending after fork()ing 6 kids
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 41,615,544 keys in 5.004154 seconds or 8,316,200 gets per second
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 45,107,971 keys in 5.006711 seconds or 9,009,502 gets per second
// - process_number=5 loop=0 prefetch_active=0 precalc_len_multiple=1 got 44,218,147 keys in 5.003569 seconds or 8,837,321 gets per second
// - process_number=6 loop=0 prefetch_active=0 precalc_len_multiple=1 got 43,659,395 keys in 5.004504 seconds or 8,724,020 gets per second
// - process_number=4 loop=0 prefetch_active=0 precalc_len_multiple=1 got 48,512,451 keys in 5.006961 seconds or 9,689,001 gets per second
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 43,828,323 keys in 5.000483 seconds or 8,764,818 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 60,376,164 keys in 5.002672 seconds or 12,068,783 gets per second
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 52,698,094 keys in 5.006964 seconds or 10,524,960 gets per second
// - process_number=5 loop=1 prefetch_active=1 precalc_len_multiple=1 got 56,645,881 keys in 5.004448 seconds or 11,319,107 gets per second
// - process_number=4 loop=1 prefetch_active=1 precalc_len_multiple=1 got 57,925,529 keys in 5.001449 seconds or 11,581,749 gets per second
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 55,207,481 keys in 5.000500 seconds or 11,040,392 gets per second
// - process_number=6 loop=1 prefetch_active=1 precalc_len_multiple=1 got 54,317,657 keys in 5.006172 seconds or 10,850,138 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 65,606,991 keys in 5.001629 seconds or 13,117,124 gets per second
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 56,945,881 keys in 5.004772 seconds or 11,378,316 gets per second
// - process_number=5 loop=2 prefetch_active=1 precalc_len_multiple=2 got 61,614,564 keys in 5.000714 seconds or 12,321,153 gets per second
// - process_number=4 loop=2 prefetch_active=1 precalc_len_multiple=2 got 55,107,481 keys in 5.001246 seconds or 11,018,750 gets per second
// - process_number=6 loop=2 prefetch_active=1 precalc_len_multiple=2 got 63,403,055 keys in 5.001584 seconds or 12,676,596 gets per second
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 68,093,967 keys in 5.003964 seconds or 13,608,005 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 59,196,516 keys in 5.002539 seconds or 11,833,294 gets per second
// - process_number=4 loop=3 prefetch_active=1 precalc_len_multiple=3 got 53,415,598 keys in 5.002627 seconds or 10,677,509 gets per second
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 66,490,031 keys in 5.007029 seconds or 13,279,338 gets per second
// - process_number=6 loop=3 prefetch_active=1 precalc_len_multiple=3 got 53,858,905 keys in 5.001302 seconds or 10,768,977 gets per second
// - process_number=5 loop=3 prefetch_active=1 precalc_len_multiple=3 got 57,891,065 keys in 5.005824 seconds or 11,564,742 gets per second
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 69,196,026 keys in 5.001882 seconds or 13,833,998 gets per second
// - process_number=1 ending
// - process_number=4 ending
// - process_number=2 ending
// - process_number=6 ending
// - process_number=5 ending
// - process_number=3 ending
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64
// - put 4,903,147 keys (before collision) in 0.778333 seconds or 6,299,550 inserts per second
// - parent ending after fork()ing 7 kids
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 37,236,685 keys in 5.003076 seconds or 7,442,758 gets per second
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 45,532,259 keys in 5.005522 seconds or 9,096,406 gets per second
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 39,418,392 keys in 5.009593 seconds or 7,868,582 gets per second
// - process_number=4 loop=0 prefetch_active=0 precalc_len_multiple=1 got 38,956,248 keys in 5.000563 seconds or 7,790,372 gets per second
// - process_number=6 loop=0 prefetch_active=0 precalc_len_multiple=1 got 40,263,576 keys in 5.002979 seconds or 8,047,920 gets per second
// - process_number=5 loop=0 prefetch_active=0 precalc_len_multiple=1 got 39,249,464 keys in 5.003498 seconds or 7,844,405 gets per second
// - process_number=7 loop=0 prefetch_active=0 precalc_len_multiple=1 got 45,001,187 keys in 5.007646 seconds or 8,986,495 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 49,128,078 keys in 5.002063 seconds or 9,821,564 gets per second
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 46,711,907 keys in 5.004042 seconds or 9,334,835 gets per second
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 47,401,731 keys in 5.004479 seconds or 9,471,861 gets per second
// - process_number=4 loop=1 prefetch_active=1 precalc_len_multiple=1 got 49,548,974 keys in 5.002409 seconds or 9,905,023 gets per second
// - process_number=6 loop=1 prefetch_active=1 precalc_len_multiple=1 got 52,232,558 keys in 5.002241 seconds or 10,441,832 gets per second
// - process_number=7 loop=1 prefetch_active=1 precalc_len_multiple=1 got 52,918,990 keys in 5.003712 seconds or 10,575,947 gets per second
// - process_number=5 loop=1 prefetch_active=1 precalc_len_multiple=1 got 50,038,798 keys in 5.007249 seconds or 9,993,271 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 53,387,918 keys in 5.007230 seconds or 10,662,166 gets per second
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 54,421,049 keys in 5.002982 seconds or 10,877,722 gets per second
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 56,487,129 keys in 5.004470 seconds or 11,287,335 gets per second
// - process_number=6 loop=2 prefetch_active=1 precalc_len_multiple=2 got 53,400,153 keys in 5.002144 seconds or 10,675,453 gets per second
// - process_number=4 loop=2 prefetch_active=1 precalc_len_multiple=2 got 51,777,198 keys in 5.005023 seconds or 10,345,047 gets per second
// - process_number=7 loop=2 prefetch_active=1 precalc_len_multiple=2 got 54,807,481 keys in 5.002081 seconds or 10,956,936 gets per second
// - process_number=5 loop=2 prefetch_active=1 precalc_len_multiple=2 got 54,024,441 keys in 5.007304 seconds or 10,789,128 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 55,866,233 keys in 5.002276 seconds or 11,168,162 gets per second
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 50,566,478 keys in 5.000430 seconds or 10,112,426 gets per second
// - process_number=6 loop=3 prefetch_active=1 precalc_len_multiple=3 got 52,363,630 keys in 5.001225 seconds or 10,470,161 gets per second
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 51,256,302 keys in 5.006153 seconds or 10,238,661 gets per second
// - process_number=4 loop=3 prefetch_active=1 precalc_len_multiple=3 got 56,090,521 keys in 5.005819 seconds or 11,205,064 gets per second
// - process_number=7 loop=3 prefetch_active=1 precalc_len_multiple=3 got 57,363,385 keys in 5.003658 seconds or 11,464,290 gets per second
// - process_number=5 loop=3 prefetch_active=1 precalc_len_multiple=3 got 52,922,382 keys in 5.003028 seconds or 10,578,070 gets per second
// - process_number=1 ending
// - process_number=3 ending
// - process_number=6 ending
// - process_number=2 ending
// - process_number=4 ending
// - process_number=7 ending
// - process_number=5 ending
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64
// - put 4,903,147 keys (before collision) in 0.761831 seconds or 6,436,003 inserts per second
// - parent ending after fork()ing 8 kids
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,712,397 keys in 5.001366 seconds or 7,340,474 gets per second
// - process_number=2 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,940,077 keys in 5.001865 seconds or 7,385,261 gets per second
// - process_number=3 loop=0 prefetch_active=0 precalc_len_multiple=1 got 37,102,221 keys in 5.003418 seconds or 7,415,375 gets per second
// - process_number=8 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,864,365 keys in 5.000885 seconds or 7,371,568 gets per second
// - process_number=6 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,867,757 keys in 5.001844 seconds or 7,370,833 gets per second
// - process_number=7 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,964,365 keys in 5.004100 seconds or 7,386,816 gets per second
// - process_number=4 loop=0 prefetch_active=0 precalc_len_multiple=1 got 37,198,829 keys in 5.005153 seconds or 7,432,106 gets per second
// - process_number=5 loop=0 prefetch_active=0 precalc_len_multiple=1 got 36,771,149 keys in 5.007912 seconds or 7,342,611 gets per second
// - process_number=2 loop=1 prefetch_active=1 precalc_len_multiple=1 got 45,697,795 keys in 5.000444 seconds or 9,138,748 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 45,601,187 keys in 5.006289 seconds or 9,108,780 gets per second
// - process_number=3 loop=1 prefetch_active=1 precalc_len_multiple=1 got 47,432,803 keys in 5.005106 seconds or 9,476,883 gets per second
// - process_number=6 loop=1 prefetch_active=1 precalc_len_multiple=1 got 42,753,944 keys in 5.001581 seconds or 8,548,086 gets per second
// - process_number=8 loop=1 prefetch_active=1 precalc_len_multiple=1 got 44,507,971 keys in 5.003052 seconds or 8,896,164 gets per second
// - process_number=7 loop=1 prefetch_active=1 precalc_len_multiple=1 got 44,476,899 keys in 5.001811 seconds or 8,892,159 gets per second
// - process_number=4 loop=1 prefetch_active=1 precalc_len_multiple=1 got 47,594,947 keys in 5.008350 seconds or 9,503,119 gets per second
// - process_number=5 loop=1 prefetch_active=1 precalc_len_multiple=1 got 45,004,579 keys in 5.007661 seconds or 8,987,146 gets per second
// - process_number=2 loop=2 prefetch_active=1 precalc_len_multiple=2 got 47,532,803 keys in 5.001259 seconds or 9,504,168 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 47,501,731 keys in 5.004918 seconds or 9,491,011 gets per second
// - process_number=3 loop=2 prefetch_active=1 precalc_len_multiple=2 got 46,122,083 keys in 5.002951 seconds or 9,218,976 gets per second
// - process_number=8 loop=2 prefetch_active=1 precalc_len_multiple=2 got 48,690,222 keys in 5.000941 seconds or 9,736,212 gets per second
// - process_number=6 loop=2 prefetch_active=1 precalc_len_multiple=2 got 46,346,371 keys in 5.003253 seconds or 9,263,248 gets per second
// - process_number=7 loop=2 prefetch_active=1 precalc_len_multiple=2 got 48,724,686 keys in 5.007236 seconds or 9,730,855 gets per second
// - process_number=4 loop=2 prefetch_active=1 precalc_len_multiple=2 got 46,874,051 keys in 5.004600 seconds or 9,366,193 gets per second
// - process_number=5 loop=2 prefetch_active=1 precalc_len_multiple=2 got 43,574,840 keys in 5.004941 seconds or 8,706,364 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 42,926,264 keys in 5.002457 seconds or 8,581,036 gets per second
// - process_number=2 loop=3 prefetch_active=1 precalc_len_multiple=3 got 48,319,235 keys in 5.007317 seconds or 9,649,725 gets per second
// - process_number=8 loop=3 prefetch_active=1 precalc_len_multiple=3 got 45,359,939 keys in 5.001278 seconds or 9,069,670 gets per second
// - process_number=3 loop=3 prefetch_active=1 precalc_len_multiple=3 got 47,498,339 keys in 5.003657 seconds or 9,492,725 gets per second
// - process_number=6 loop=3 prefetch_active=1 precalc_len_multiple=3 got 43,343,768 keys in 5.006243 seconds or 8,657,943 gets per second
// - process_number=7 loop=3 prefetch_active=1 precalc_len_multiple=3 got 50,232,014 keys in 5.004614 seconds or 10,037,140 gets per second
// - process_number=5 loop=3 prefetch_active=1 precalc_len_multiple=3 got 47,429,411 keys in 5.000161 seconds or 9,485,577 gets per second
// - process_number=4 loop=3 prefetch_active=1 precalc_len_multiple=3 got 50,597,550 keys in 5.005670 seconds or 10,108,048 gets per second
// - process_number=1 ending
// - process_number=2 ending
// - process_number=8 ending
// - process_number=3 ending
// - process_number=6 ending
// - process_number=7 ending
// - process_number=5 ending
// - process_number=4 ending
//
// experiments with one process and changing the size of the bucket therefore relying on cache-line read-ahead
//
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 8,388,608 buckets of size 64 with 8 slots per bucket or 67,108,864 total slots
// - put 4,903,147 keys (before collision) in 0.790470 seconds or 6,202,826 inserts per second
// - parent ending after fork()ing 1 kids
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 77,117,221 keys in 5.002977 seconds or 15,414,267 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 104,504,487 keys in 5.002284 seconds or 20,891,354 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 124,248,147 keys in 5.000843 seconds or 24,845,441 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 124,117,075 keys in 5.000037 seconds or 24,823,232 gets per second
// - process_number=1 ending
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 4,194,304 buckets of size 128 with 16 slots per bucket or 67,108,864 total slots
// - hit magic bucket 0; re-hashing key 10,253,294
// ...
// - hit magic bucket 0; re-hashing key 12,192,560
// - put 13,076,546 keys (before collision) in 1.570020 seconds or 8,328,904 inserts per second
// - parent ending after fork()ing 1 kids
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 63,740,520 keys in 5.001866 seconds or 12,743,348 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 95,630,126 keys in 5.000265 seconds or 19,125,011 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 106,806,128 keys in 5.000957 seconds or 21,357,138 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 109,427,568 keys in 5.001722 seconds or 21,877,978 gets per second
// - process_number=1 ending
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 2,097,152 buckets of size 256 with 32 slots per bucket or 67,108,864 total slots
// - hit magic bucket 0; re-hashing key 9,003,110
// ...
// - hit magic bucket 0; re-hashing key 22,234,547
// - put 23,028,554 keys (before collision) in 2.939141 seconds or 7,835,131 inserts per second
// - parent ending after fork()ing 1 kids
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 56,377,332 keys in 5.004045 seconds or 11,266,352 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 75,866,942 keys in 5.001897 seconds or 15,167,634 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 94,242,440 keys in 5.000053 seconds or 18,848,288 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 92,014,216 keys in 5.000120 seconds or 18,402,402 gets per second
// - process_number=1 ending
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 1,048,576 buckets of size 512 with 64 slots per bucket or 67,108,864 total slots
// - hit magic bucket 0; re-hashing key 3,933,621
// ...
// - hit magic bucket 0; re-hashing key 34,570,889
// - put 34,746,952 keys (before collision) in 5.457229 seconds or 6,367,142 inserts per second
// - parent ending after fork()ing 1 kids
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 40,348,584 keys in 5.001734 seconds or 8,066,919 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 58,502,056 keys in 5.006553 seconds or 11,685,097 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 57,519,016 keys in 5.005921 seconds or 11,490,196 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 59,091,880 keys in 5.000849 seconds or 11,816,370 gets per second
// - process_number=1 ending
// $ gcc -O3 -I. -o 1-cache-line 1-cache-line.c murmurhash3.c && ./1-cache-line
// - starting
// - mmap() 536,870,912 bytes or 512 MB containing 524,288 buckets of size 1024 with 128 slots per bucket or 67,108,864 total slots
// - hit magic bucket 0; re-hashing key 3,230,506
// ...
// - hit magic bucket 0; re-hashing key 42,292,122
// - put 43,103,254 keys (before collision) in 9.367549 seconds or 4,601,337 inserts per second
// - parent ending after fork()ing 1 kids
// - process_number=1 loop=0 prefetch_active=0 precalc_len_multiple=1 got 30,636,384 keys in 5.003317 seconds or 6,123,215 gets per second
// - process_number=1 loop=1 prefetch_active=1 precalc_len_multiple=1 got 38,828,384 keys in 5.000796 seconds or 7,764,441 gets per second
// - process_number=1 loop=2 prefetch_active=1 precalc_len_multiple=2 got 39,352,672 keys in 5.005588 seconds or 7,861,748 gets per second
// - process_number=1 loop=3 prefetch_active=1 precalc_len_multiple=3 got 39,156,064 keys in 5.006275 seconds or 7,821,397 gets per second
// - process_number=1 ending
/**
* Based on the MIT licensed [1], C++ function here [2].
* We only use the functions outputting a 128bit hash.
*
* [1]http://code.google.com/p/smhasher/
* [2]http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
*/
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#include "murmurhash3.h"
#define FORCE_INLINE __attribute__((always_inline)) inline
static inline uint32_t rotl32 ( uint32_t x, int8_t r )
{
return (x << r) | (x >> (32 - r));
}
static inline uint64_t rotl64 ( uint64_t x, int8_t r )
{
return (x << r) | (x >> (64 - r));
}
#define ROTL32(x,y) rotl32(x,y)
#define ROTL64(x,y) rotl64(x,y)
#define BIG_CONSTANT(x) (x##LLU)
//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here
static FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
{
return p[i];
}
static FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
{
return p[i];
}
//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche
static FORCE_INLINE uint32_t fmix32 ( uint32_t h )
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
//----------
static FORCE_INLINE uint64_t fmix64 ( uint64_t k )
{
k ^= k >> 33;
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
k ^= k >> 33;
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
k ^= k >> 33;
return k;
}
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, const int len,
const uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 16;
uint64_t h1 = seed;
uint64_t h2 = seed;
const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
//----------
// body
const uint64_t * blocks = (const uint64_t *)(data);
for(int i = 0; i < nblocks; i++)
{
uint64_t k1 = getblock64(blocks,i*2+0);
uint64_t k2 = getblock64(blocks,i*2+1);
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
uint64_t k1 = 0;
uint64_t k2 = 0;
switch(len & 15)
{
case 15: k2 ^= (uint64_t)(tail[14]) << 48;
case 14: k2 ^= (uint64_t)(tail[13]) << 40;
case 13: k2 ^= (uint64_t)(tail[12]) << 32;
case 12: k2 ^= (uint64_t)(tail[11]) << 24;
case 11: k2 ^= (uint64_t)(tail[10]) << 16;
case 10: k2 ^= (uint64_t)(tail[ 9]) << 8;
case 9: k2 ^= (uint64_t)(tail[ 8]) << 0;
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
case 8: k1 ^= (uint64_t)(tail[ 7]) << 56;
case 7: k1 ^= (uint64_t)(tail[ 6]) << 48;
case 6: k1 ^= (uint64_t)(tail[ 5]) << 40;
case 5: k1 ^= (uint64_t)(tail[ 4]) << 32;
case 4: k1 ^= (uint64_t)(tail[ 3]) << 24;
case 3: k1 ^= (uint64_t)(tail[ 2]) << 16;
case 2: k1 ^= (uint64_t)(tail[ 1]) << 8;
case 1: k1 ^= (uint64_t)(tail[ 0]) << 0;
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len; h2 ^= len;
h1 += h2;
h2 += h1;
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 += h2;
h2 += h1;
((uint64_t*)out)[0] = h1;
((uint64_t*)out)[1] = h2;
}
#ifndef __MURMURHASH3_H__
#define __MURMURHASH3_H__
#include <stdint.h>
extern void MurmurHash3_x64_128(const void * key, const int len, const uint32_t seed, void * out);
#endif /* __MURMURHASH3_H__ */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment