Last active
May 7, 2017 15:30
-
-
Save nishidy/f1fd8003602371802d9b002f82622b3c to your computer and use it in GitHub Desktop.
Consistent Hashing in C++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <sstream> | |
#include <algorithm> | |
#include <vector> | |
#include <map> | |
#include <math.h> | |
//#include <list> # lower_bound does not work | |
#include <openssl/sha.h> | |
#include <assert.h> | |
using namespace std; | |
typedef unsigned int UI; | |
typedef unsigned long UL; | |
typedef string DATA; | |
typedef string HOSTNAME; | |
typedef UL HASH; | |
UI vnode_num; | |
enum Ope { | |
ADD = 0, | |
REMOVE, | |
}; | |
class Node { | |
public: | |
map<HASH,DATA> storage; | |
string hostname; | |
Node(){} | |
Node(string hostname):hostname(hostname){} | |
void add_data(HASH hash, string data){ | |
assert(storage.find(hash)==storage.end()); | |
storage[hash] = data; | |
} | |
void stat(){ | |
cout << hostname << ":" << storage.size() << endl; | |
} | |
UI get_size(){ | |
return storage.size(); | |
} | |
bool get_data_between(HASH pre_hash, HASH hash, pair<HASH,DATA>& p){ | |
assert(pre_hash<hash); | |
for(auto data: storage){ | |
if(pre_hash < data.first && data.first < hash){ | |
p = data; | |
return true; | |
} | |
} | |
return false; | |
} | |
void remove_data(HASH hash){ | |
storage.erase(hash); | |
} | |
}; | |
typedef vector<UL> VL; | |
typedef vector<Node*> VNp; | |
typedef map<HOSTNAME,Node*> MHNp; | |
class CHasher { | |
VL hashidxtable; | |
VNp virnodetable; | |
MHNp realnodemap; | |
public: | |
CHasher(){} | |
HASH hasher(string s){ | |
SHA256_CTX c; | |
unsigned char hash[SHA256_DIGEST_LENGTH]; | |
SHA256_Init(&c); | |
SHA256_Update(&c,s.c_str(),s.length()); | |
SHA256_Final(hash,&c); | |
UL n=0; | |
for(auto i=0;i<SHA256_DIGEST_LENGTH;i+=8){ | |
n+=((UL)hash[i+7]<<48)+((UL)hash[i+6]<<48)+((UL)hash[i+5]<<40)+((UL)hash[i+4]<<32)+ | |
(hash[i+3]<<24)+(hash[i+2]<<16)+(hash[i+1]<<8)+hash[i]; | |
} | |
return n; | |
} | |
UI find_node_idx(HASH hash){ | |
VL::iterator low = lower_bound(hashidxtable.begin(),hashidxtable.end(),hash); | |
UI idx = low-hashidxtable.begin(); | |
return idx; | |
} | |
void add_node(string hostname){ | |
Node* node; | |
if(realnodemap.find(hostname)==realnodemap.end()){ | |
node = new Node(hostname); | |
}else{ | |
return; | |
} | |
realnodemap[hostname] = node; | |
stringstream ss; | |
for(UI i=0;i<vnode_num;i++){ | |
ss << hostname << i; | |
HASH hash = hasher(ss.str()); | |
UI idx = find_node_idx(hash); | |
hashidxtable.insert(hashidxtable.begin()+idx,hash); | |
virnodetable.insert(virnodetable.begin()+idx,node); | |
if(virnodetable.size()>1){ | |
redistribute(idx, hash, ADD); | |
} | |
} | |
} | |
void add_data(string data){ | |
HASH hash = hasher(data); | |
UI idx = find_node_idx(hash); | |
idx = idx == hashidxtable.size() ? 0 : idx; | |
Node* node = virnodetable[idx]; | |
node->add_data(hash,data); | |
} | |
void remove_node(HOSTNAME hostname){ | |
if(realnodemap.find(hostname)==realnodemap.end()){ | |
return; | |
} | |
stringstream ss; | |
for(UI i=0;i<vnode_num;i++){ | |
ss << hostname << i; | |
HASH hash = hasher(ss.str()); | |
UI idx = find_node_idx(hash); | |
if(virnodetable.size()>1){ | |
redistribute(idx, hash, REMOVE); | |
} | |
hashidxtable.erase(hashidxtable.begin()+idx); | |
virnodetable.erase(virnodetable.begin()+idx); | |
} | |
assert(realnodemap[hostname]->storage.size()==0); | |
realnodemap.erase(hostname); | |
} | |
void remove_data(){ | |
} | |
void stat(){ | |
UI size=0; | |
UI node_cnt=0; | |
for(auto node : realnodemap){ | |
node_cnt++; | |
node.second->stat(); | |
size += node.second->get_size(); | |
} | |
cout << "Total data : " << size << endl; | |
UI ave = size/node_cnt; | |
double std = 0.0; | |
for(auto node : realnodemap){ | |
std += pow((double)(node.second->get_size())-ave,2.0); | |
} | |
cout << "Standard deviation: " << sqrt(std/ave) << endl; | |
} | |
void redistribute(UI idx, HASH hash, enum Ope o){ | |
Node* this_node = virnodetable[idx]; | |
Node* next_node = get_next_node(idx); | |
HASH pre_hash = get_pre_hash(idx); | |
switch(o){ | |
case ADD: | |
if(idx==0){ | |
do_redistribute(next_node,this_node,pre_hash,ULONG_MAX); | |
do_redistribute(next_node,this_node,0,hash); | |
}else{ | |
do_redistribute(next_node,this_node,pre_hash,hash); | |
} | |
break; | |
case REMOVE: | |
if(idx==0){ | |
do_redistribute(this_node,next_node,pre_hash,ULONG_MAX); | |
do_redistribute(this_node,next_node,0,hash); | |
}else{ | |
do_redistribute(this_node,next_node,pre_hash,hash); | |
} | |
break; | |
} | |
} | |
HASH get_pre_hash(UI idx){ | |
UI pre_idx = idx==0?hashidxtable.size()-1:idx-1; | |
return hashidxtable[pre_idx]; | |
} | |
Node* get_next_node(UI idx){ | |
Node* this_node = virnodetable[idx]; | |
Node* next_node; | |
UI flip=0; | |
do{ | |
UI next_idx = idx==hashidxtable.size()-1?0:idx+1; | |
flip += next_idx==0?1:0; | |
next_node = virnodetable[next_idx]; | |
idx = next_idx; | |
}while(this_node==next_node && flip<=1); | |
return next_node; | |
} | |
void do_redistribute(Node* from, Node* to, HASH pre_hash, HASH this_hash){ | |
for(;;){ | |
pair<HASH,DATA> p; | |
if(!(from->get_data_between(pre_hash,this_hash,p))) | |
break; | |
from->remove_data(p.first); | |
to->add_data(p.first, p.second); | |
} | |
} | |
}; | |
vector<HOSTNAME> create_hostnames(string h,UI n){ | |
vector<HOSTNAME> hostnames; | |
for(UI i=0;i<n;i++){ | |
stringstream ss; | |
ss << h << "_" << i; | |
hostnames.push_back(ss.str()); | |
} | |
return hostnames; | |
} | |
vector<DATA> create_data(string d,UI n){ | |
vector<DATA> data; | |
for(UI i=0;i<n;i++){ | |
stringstream ss; | |
ss << "This_is_" << d << i; | |
data.push_back(ss.str()); | |
} | |
return data; | |
} | |
int main(int argc, char* argv[]){ | |
CHasher chasher = CHasher(); | |
UI node_cnt; | |
UI data_cnt; | |
if(argc>2){ | |
node_cnt = atoi(argv[1]); | |
data_cnt = atoi(argv[2]); | |
}else{ | |
node_cnt = 10; | |
data_cnt = 1000; | |
} | |
if(argc>3){ | |
vnode_num = atoi(argv[3]); | |
}else{ | |
vnode_num = 3; | |
} | |
cout << "# of virtual node " << vnode_num << endl << endl; | |
cout << node_cnt << " nodes added."<< endl; | |
vector<HOSTNAME> hostnames = create_hostnames("hostname",node_cnt); | |
for(auto hostname: hostnames){ | |
chasher.add_node(hostname); | |
} | |
cout << data_cnt << " data added."<< endl; | |
vector<DATA> data = create_data("data",data_cnt); | |
for(auto data_: data){ | |
chasher.add_data(data_); | |
} | |
chasher.stat(); | |
cout << endl; | |
cout << data_cnt << " data added."<< endl; | |
vector<DATA> newdata = create_data("new_data",data_cnt); | |
for(auto data_: newdata){ | |
chasher.add_data(data_); | |
} | |
cout << node_cnt << " nodes added."<< endl; | |
vector<HOSTNAME> newhostnames = create_hostnames("new_hostname",node_cnt); | |
for(auto hostname: newhostnames){ | |
chasher.add_node(hostname); | |
} | |
chasher.stat(); | |
cout << endl; | |
cout << node_cnt << " nodes removed."<< endl; | |
for(auto hostname: hostnames){ | |
chasher.remove_node(hostname); | |
} | |
chasher.stat(); | |
cout << endl; | |
return 0; | |
} | |
Author
nishidy
commented
May 7, 2017
•
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment