Skip to content

Instantly share code, notes, and snippets.

@nishidy
Last active May 7, 2017 15:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nishidy/f1fd8003602371802d9b002f82622b3c to your computer and use it in GitHub Desktop.
Save nishidy/f1fd8003602371802d9b002f82622b3c to your computer and use it in GitHub Desktop.
Consistent Hashing in C++
#include <iostream>
#include <sstream>
#include <algorithm>
#include <vector>
#include <map>
#include <math.h>
//#include <list> # lower_bound does not work
#include <openssl/sha.h>
#include <assert.h>
using namespace std;
typedef unsigned int UI;
typedef unsigned long UL;
typedef string DATA;
typedef string HOSTNAME;
typedef UL HASH;
UI vnode_num;
enum Ope {
ADD = 0,
REMOVE,
};
class Node {
public:
map<HASH,DATA> storage;
string hostname;
Node(){}
Node(string hostname):hostname(hostname){}
void add_data(HASH hash, string data){
assert(storage.find(hash)==storage.end());
storage[hash] = data;
}
void stat(){
cout << hostname << ":" << storage.size() << endl;
}
UI get_size(){
return storage.size();
}
bool get_data_between(HASH pre_hash, HASH hash, pair<HASH,DATA>& p){
assert(pre_hash<hash);
for(auto data: storage){
if(pre_hash < data.first && data.first < hash){
p = data;
return true;
}
}
return false;
}
void remove_data(HASH hash){
storage.erase(hash);
}
};
typedef vector<UL> VL;
typedef vector<Node*> VNp;
typedef map<HOSTNAME,Node*> MHNp;
class CHasher {
VL hashidxtable;
VNp virnodetable;
MHNp realnodemap;
public:
CHasher(){}
HASH hasher(string s){
SHA256_CTX c;
unsigned char hash[SHA256_DIGEST_LENGTH];
SHA256_Init(&c);
SHA256_Update(&c,s.c_str(),s.length());
SHA256_Final(hash,&c);
UL n=0;
for(auto i=0;i<SHA256_DIGEST_LENGTH;i+=8){
n+=((UL)hash[i+7]<<48)+((UL)hash[i+6]<<48)+((UL)hash[i+5]<<40)+((UL)hash[i+4]<<32)+
(hash[i+3]<<24)+(hash[i+2]<<16)+(hash[i+1]<<8)+hash[i];
}
return n;
}
UI find_node_idx(HASH hash){
VL::iterator low = lower_bound(hashidxtable.begin(),hashidxtable.end(),hash);
UI idx = low-hashidxtable.begin();
return idx;
}
void add_node(string hostname){
Node* node;
if(realnodemap.find(hostname)==realnodemap.end()){
node = new Node(hostname);
}else{
return;
}
realnodemap[hostname] = node;
stringstream ss;
for(UI i=0;i<vnode_num;i++){
ss << hostname << i;
HASH hash = hasher(ss.str());
UI idx = find_node_idx(hash);
hashidxtable.insert(hashidxtable.begin()+idx,hash);
virnodetable.insert(virnodetable.begin()+idx,node);
if(virnodetable.size()>1){
redistribute(idx, hash, ADD);
}
}
}
void add_data(string data){
HASH hash = hasher(data);
UI idx = find_node_idx(hash);
idx = idx == hashidxtable.size() ? 0 : idx;
Node* node = virnodetable[idx];
node->add_data(hash,data);
}
void remove_node(HOSTNAME hostname){
if(realnodemap.find(hostname)==realnodemap.end()){
return;
}
stringstream ss;
for(UI i=0;i<vnode_num;i++){
ss << hostname << i;
HASH hash = hasher(ss.str());
UI idx = find_node_idx(hash);
if(virnodetable.size()>1){
redistribute(idx, hash, REMOVE);
}
hashidxtable.erase(hashidxtable.begin()+idx);
virnodetable.erase(virnodetable.begin()+idx);
}
assert(realnodemap[hostname]->storage.size()==0);
realnodemap.erase(hostname);
}
void remove_data(){
}
void stat(){
UI size=0;
UI node_cnt=0;
for(auto node : realnodemap){
node_cnt++;
node.second->stat();
size += node.second->get_size();
}
cout << "Total data : " << size << endl;
UI ave = size/node_cnt;
double std = 0.0;
for(auto node : realnodemap){
std += pow((double)(node.second->get_size())-ave,2.0);
}
cout << "Standard deviation: " << sqrt(std/ave) << endl;
}
void redistribute(UI idx, HASH hash, enum Ope o){
Node* this_node = virnodetable[idx];
Node* next_node = get_next_node(idx);
HASH pre_hash = get_pre_hash(idx);
switch(o){
case ADD:
if(idx==0){
do_redistribute(next_node,this_node,pre_hash,ULONG_MAX);
do_redistribute(next_node,this_node,0,hash);
}else{
do_redistribute(next_node,this_node,pre_hash,hash);
}
break;
case REMOVE:
if(idx==0){
do_redistribute(this_node,next_node,pre_hash,ULONG_MAX);
do_redistribute(this_node,next_node,0,hash);
}else{
do_redistribute(this_node,next_node,pre_hash,hash);
}
break;
}
}
HASH get_pre_hash(UI idx){
UI pre_idx = idx==0?hashidxtable.size()-1:idx-1;
return hashidxtable[pre_idx];
}
Node* get_next_node(UI idx){
Node* this_node = virnodetable[idx];
Node* next_node;
UI flip=0;
do{
UI next_idx = idx==hashidxtable.size()-1?0:idx+1;
flip += next_idx==0?1:0;
next_node = virnodetable[next_idx];
idx = next_idx;
}while(this_node==next_node && flip<=1);
return next_node;
}
void do_redistribute(Node* from, Node* to, HASH pre_hash, HASH this_hash){
for(;;){
pair<HASH,DATA> p;
if(!(from->get_data_between(pre_hash,this_hash,p)))
break;
from->remove_data(p.first);
to->add_data(p.first, p.second);
}
}
};
vector<HOSTNAME> create_hostnames(string h,UI n){
vector<HOSTNAME> hostnames;
for(UI i=0;i<n;i++){
stringstream ss;
ss << h << "_" << i;
hostnames.push_back(ss.str());
}
return hostnames;
}
vector<DATA> create_data(string d,UI n){
vector<DATA> data;
for(UI i=0;i<n;i++){
stringstream ss;
ss << "This_is_" << d << i;
data.push_back(ss.str());
}
return data;
}
int main(int argc, char* argv[]){
CHasher chasher = CHasher();
UI node_cnt;
UI data_cnt;
if(argc>2){
node_cnt = atoi(argv[1]);
data_cnt = atoi(argv[2]);
}else{
node_cnt = 10;
data_cnt = 1000;
}
if(argc>3){
vnode_num = atoi(argv[3]);
}else{
vnode_num = 3;
}
cout << "# of virtual node " << vnode_num << endl << endl;
cout << node_cnt << " nodes added."<< endl;
vector<HOSTNAME> hostnames = create_hostnames("hostname",node_cnt);
for(auto hostname: hostnames){
chasher.add_node(hostname);
}
cout << data_cnt << " data added."<< endl;
vector<DATA> data = create_data("data",data_cnt);
for(auto data_: data){
chasher.add_data(data_);
}
chasher.stat();
cout << endl;
cout << data_cnt << " data added."<< endl;
vector<DATA> newdata = create_data("new_data",data_cnt);
for(auto data_: newdata){
chasher.add_data(data_);
}
cout << node_cnt << " nodes added."<< endl;
vector<HOSTNAME> newhostnames = create_hostnames("new_hostname",node_cnt);
for(auto hostname: newhostnames){
chasher.add_node(hostname);
}
chasher.stat();
cout << endl;
cout << node_cnt << " nodes removed."<< endl;
for(auto hostname: hostnames){
chasher.remove_node(hostname);
}
chasher.stat();
cout << endl;
return 0;
}
@nishidy
Copy link
Author

nishidy commented May 7, 2017

$ for vnodes in 5 10 50;do echo "===================="; ./consistent_hashing 20 100000 $vnodes; done
====================
# of virtual node 5

20 nodes added.
100000 data added.
hostname_0:4086
hostname_1:7441
hostname_10:4325
hostname_11:2634
hostname_12:6124
hostname_13:2903
hostname_14:4861
hostname_15:7268
hostname_16:10661
hostname_17:3407
hostname_18:4990
hostname_19:5413
hostname_2:2357
hostname_3:3977
hostname_4:6952
hostname_5:7974
hostname_6:3728
hostname_7:4711
hostname_8:2558
hostname_9:3630
Total data : 100000
Standard deviation: 133.158

100000 data added.
20 nodes added.
hostname_0:7569
hostname_1:7659
hostname_10:1651
hostname_11:2791
hostname_12:4848
hostname_13:3655
hostname_14:8228
hostname_15:2227
hostname_16:7483
hostname_17:5050
hostname_18:2284
hostname_19:3754
hostname_2:2654
hostname_3:5822
hostname_4:7266
hostname_5:11898
hostname_6:7470
hostname_7:2145
hostname_8:2884
hostname_9:5606
new_hostname_0:6778
new_hostname_1:3806
new_hostname_10:4189
new_hostname_11:5103
new_hostname_12:5113
new_hostname_13:3994
new_hostname_14:4797
new_hostname_15:4712
new_hostname_16:3975
new_hostname_17:7190
new_hostname_18:5322
new_hostname_19:2384
new_hostname_2:3052
new_hostname_3:6895
new_hostname_4:7255
new_hostname_5:5526
new_hostname_6:5599
new_hostname_7:3614
new_hostname_8:4532
new_hostname_9:3220
Total data : 200000
Standard deviation: 189.953

20 nodes removed.
new_hostname_0:12133
new_hostname_1:7445
new_hostname_10:8427
new_hostname_11:11858
new_hostname_12:6931
new_hostname_13:9185
new_hostname_14:9547
new_hostname_15:10155
new_hostname_16:7873
new_hostname_17:14558
new_hostname_18:11769
new_hostname_19:7875
new_hostname_2:11489
new_hostname_3:12328
new_hostname_4:9250
new_hostname_5:7016
new_hostname_6:18034
new_hostname_7:12031
new_hostname_8:6203
new_hostname_9:5893
Total data : 200000
Standard deviation: 133.161

====================
# of virtual node 10

20 nodes added.
100000 data added.
hostname_0:7444
hostname_1:7425
hostname_10:7260
hostname_11:3562
hostname_12:4675
hostname_13:1902
hostname_14:3951
hostname_15:4050
hostname_16:5810
hostname_17:5733
hostname_18:5735
hostname_19:5512
hostname_2:3531
hostname_3:2132
hostname_4:7543
hostname_5:7572
hostname_6:3744
hostname_7:5216
hostname_8:2720
hostname_9:4483
Total data : 100000
Standard deviation: 112.505

100000 data added.
20 nodes added.
hostname_0:8775
hostname_1:7321
hostname_10:4377
hostname_11:4400
hostname_12:3859
hostname_13:2820
hostname_14:5459
hostname_15:2481
hostname_16:5691
hostname_17:4820
hostname_18:4062
hostname_19:3833
hostname_2:4787
hostname_3:2775
hostname_4:5203
hostname_5:12245
hostname_6:6032
hostname_7:3619
hostname_8:4199
hostname_9:4705
new_hostname_0:6005
new_hostname_1:4938
new_hostname_10:4731
new_hostname_11:4210
new_hostname_12:5240
new_hostname_13:4597
new_hostname_14:6689
new_hostname_15:4489
new_hostname_16:2350
new_hostname_17:4460
new_hostname_18:5291
new_hostname_19:6697
new_hostname_2:5067
new_hostname_3:3888
new_hostname_4:4753
new_hostname_5:6640
new_hostname_6:2740
new_hostname_7:4811
new_hostname_8:5823
new_hostname_9:5118
Total data : 200000
Standard deviation: 156.285

20 nodes removed.
new_hostname_0:13649
new_hostname_1:8137
new_hostname_10:10660
new_hostname_11:7418
new_hostname_12:9242
new_hostname_13:7329
new_hostname_14:15038
new_hostname_15:11601
new_hostname_16:5908
new_hostname_17:10684
new_hostname_18:11782
new_hostname_19:13305
new_hostname_2:14217
new_hostname_3:7048
new_hostname_4:10909
new_hostname_5:9404
new_hostname_6:4865
new_hostname_7:13059
new_hostname_8:7634
new_hostname_9:8111
Total data : 200000
Standard deviation: 127.837

====================
# of virtual node 50

20 nodes added.
100000 data added.
hostname_0:4376
hostname_1:4674
hostname_10:4245
hostname_11:4634
hostname_12:4804
hostname_13:5806
hostname_14:4657
hostname_15:4233
hostname_16:4738
hostname_17:5738
hostname_18:6156
hostname_19:6325
hostname_2:5663
hostname_3:4561
hostname_4:5131
hostname_5:4360
hostname_6:4469
hostname_7:4862
hostname_8:4860
hostname_9:5708
Total data : 100000
Standard deviation: 40.447

100000 data added.
20 nodes added.
hostname_0:4229
hostname_1:4499
hostname_10:4554
hostname_11:5096
hostname_12:5192
hostname_13:5854
hostname_14:4057
hostname_15:5295
hostname_16:4950
hostname_17:4764
hostname_18:6531
hostname_19:6049
hostname_2:5677
hostname_3:4941
hostname_4:3998
hostname_5:5283
hostname_6:5151
hostname_7:5835
hostname_8:5206
hostname_9:5062
new_hostname_0:4391
new_hostname_1:5325
new_hostname_10:4615
new_hostname_11:4972
new_hostname_12:5687
new_hostname_13:5111
new_hostname_14:5125
new_hostname_15:4686
new_hostname_16:4979
new_hostname_17:4064
new_hostname_18:4485
new_hostname_19:4998
new_hostname_2:3773
new_hostname_3:5013
new_hostname_4:5245
new_hostname_5:4972
new_hostname_6:4315
new_hostname_7:6661
new_hostname_8:4504
new_hostname_9:4856
Total data : 200000
Standard deviation: 56.6357

20 nodes removed.
new_hostname_0:8579
new_hostname_1:11997
new_hostname_10:10671
new_hostname_11:9348
new_hostname_12:11011
new_hostname_13:8390
new_hostname_14:10171
new_hostname_15:9492
new_hostname_16:8481
new_hostname_17:10425
new_hostname_18:8117
new_hostname_19:10569
new_hostname_2:11132
new_hostname_3:8634
new_hostname_4:9893
new_hostname_5:11137
new_hostname_6:10681
new_hostname_7:11824
new_hostname_8:8972
new_hostname_9:10476
Total data : 200000
Standard deviation: 51.6403

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment