Last active
August 5, 2016 08:22
-
-
Save upbit/219d25fd449f80a8e997eb323fda0674 to your computer and use it in GitHub Desktop.
Google Jump Consistent Hash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <time.h> | |
#include <string> | |
using namespace std; | |
/**************************************************************** | |
* | |
* Hash Functions | |
* | |
****************************************************************/ | |
// MurmurHash2, 64-bit versions, by Austin Appleby | |
// https://sites.google.com/site/murmurhash/ | |
uint64_t MurmurHash2_x64(const void * key, int len, uint32_t seed) | |
{ | |
const uint64_t m = 0xc6a4a7935bd1e995; | |
const int r = 47; | |
uint64_t h = seed ^ (len * m); | |
const uint64_t * data = (const uint64_t *)key; | |
const uint64_t * end = data + (len/8); | |
while(data != end) | |
{ | |
uint64_t k = *data++; | |
k *= m; | |
k ^= k >> r; | |
k *= m; | |
h ^= k; | |
h *= m; | |
} | |
const uint8_t * data2 = (const uint8_t*)data; | |
switch(len & 7) | |
{ | |
case 7: h ^= ((uint64_t)data2[6]) << 48; | |
case 6: h ^= ((uint64_t)data2[5]) << 40; | |
case 5: h ^= ((uint64_t)data2[4]) << 32; | |
case 4: h ^= ((uint64_t)data2[3]) << 24; | |
case 3: h ^= ((uint64_t)data2[2]) << 16; | |
case 2: h ^= ((uint64_t)data2[1]) << 8; | |
case 1: h ^= ((uint64_t)data2[0]); | |
h *= m; | |
}; | |
h ^= h >> r; | |
h *= m; | |
h ^= h >> r; | |
return h; | |
} | |
// Jump Consistent Hash: http://arxiv.org/ftp/arxiv/papers/1406/1406.2294.pdf | |
// https://blog.helong.info/blog/2015/03/13/jump_consistent_hash/ | |
int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets) { | |
int64_t b = -1, j = 0; | |
while (j < num_buckets) { | |
b = j; | |
key = key * 2862933555777941757ULL + 1; | |
j = (b + 1) * (double(1LL << 31) / double((key >> 33) + 1)); | |
} | |
return b; | |
} | |
/**************************************************************** | |
* | |
* TEST | |
* | |
****************************************************************/ | |
char *rand_cstring(const int len) | |
{ | |
static char buffer[256] = {0}; | |
int i; | |
for (i = 0; i < min(len,255); i++) | |
buffer[i] = 'a' + rand()%26;; | |
buffer[++i] = '\0'; | |
return buffer; | |
} | |
// 根据 value 生成 bucket_id | |
int get_bucket_id(const char *value, const int len, const int bucket_num) | |
{ | |
uint64_t key = MurmurHash2_x64(value, len, 0); | |
return JumpConsistentHash(key, bucket_num); | |
} | |
int main() | |
{ | |
srand( time(NULL) ); | |
// 模拟一致性哈希的分布场景,从 server_number1 变化为 server_number2 时,影响的key数目 | |
int server_number1 = 8; | |
int server_number2 = 9; | |
// 将10w视频id分布到N台机器上 | |
int *pbktSum1 = (int*)calloc(sizeof(int), server_number1); | |
int *pbktSum2 = (int*)calloc(sizeof(int), server_number2); | |
int loop_keys = 100000; | |
int miss_count = 0; | |
for (int i = 0; i < loop_keys; i++) | |
{ | |
int len = 11; | |
if (i % 2 == 0) len = 15; | |
const char *szId = rand_cstring(len); | |
int bucket1 = get_bucket_id(szId, len, server_number1); | |
pbktSum1[bucket1]++; | |
int bucket2 = get_bucket_id(szId, len, server_number2); | |
pbktSum2[bucket2]++; | |
if (bucket1 != bucket2) | |
{ | |
miss_count++; | |
// printf("%d: %s bucket=%d -> bucket=%d\n", miss_count, szId, bucket1, bucket2); | |
} | |
} | |
// 输出各桶的分布情况 | |
printf(">> server_number %d -> %d\n", server_number1, server_number2); | |
for (int i = 0; i < server_number1; i++) | |
printf("[%d/%d] %d\n", i+1, server_number1, pbktSum1[i]); | |
printf(">> cache miss %.2f%%\n", float(miss_count)/loop_keys*100); | |
for (int i = 0; i < server_number2; i++) | |
printf("[%d/%d] %d\n", i+1, server_number2, pbktSum2[i]); | |
free(pbktSum1); | |
free(pbktSum2); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment