Last active
August 29, 2015 14:05
-
-
Save bsdoliv/2bc909afb7314e554354 to your computer and use it in GitHub Desktop.
relayd-consistent-hash.diff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/relayd/parse.y b/relayd/parse.y | |
index 3391560..34e1021 100644 | |
--- a/relayd/parse.y | |
+++ b/relayd/parse.y | |
@@ -1,4 +1,4 @@ | |
-/* $OpenBSD: parse.y,v 1.187 2014/07/11 17:35:16 reyk Exp $ */ | |
+/* $OpenBSD: parse.y,v 1.188 2014/08/29 09:03:36 blambert Exp $ */ | |
/* | |
* Copyright (c) 2007 - 2014 Reyk Floeter <reyk@openbsd.org> | |
@@ -115,6 +115,7 @@ static int tagged = 0; | |
static int tag = 0; | |
static in_port_t tableport = 0; | |
static int dstmode; | |
+static u_int32_t hashseed = 0; | |
static enum key_type keytype = KEY_TYPE_NONE; | |
static enum direction dir = RELAY_DIR_ANY; | |
static char *rulefile = NULL; | |
@@ -168,7 +169,7 @@ typedef struct { | |
%token SOCKET SPLICE SSL STICKYADDR STYLE TABLE TAG TAGGED TCP TIMEOUT TO | |
%token ROUTER RTLABEL TRANSPARENT TRAP UPDATES URL VIRTUAL WITH TTL RTABLE | |
%token MATCH PARAMS RANDOM LEASTSTATES SRCHASH KEY CERTIFICATE PASSWORD ECDH | |
-%token EDH CURVE | |
+%token EDH CURVE CONSISTHASH | |
%token <v.string> STRING | |
%token <v.number> NUMBER | |
%type <v.string> hostname interface table value optstring | |
@@ -177,7 +178,7 @@ typedef struct { | |
%type <v.number> optssl optsslclient sslcache | |
%type <v.number> redirect_proto relay_proto match | |
%type <v.number> action ruleaf key_option | |
-%type <v.number> ssldhparams sslecdhcurve | |
+%type <v.number> ssldhparams sslecdhcurve hashseed | |
%type <v.port> port | |
%type <v.host> host | |
%type <v.addr> address | |
@@ -728,8 +729,9 @@ tableopts : CHECK tablecheck | |
table->conf.skip_cnt = | |
($2 / conf->sc_interval.tv_sec) - 1; | |
} | |
- | MODE dstmode { | |
+ | MODE dstmode hashseed { | |
switch ($2) { | |
+ case RELAY_DSTMODE_CONSISTHASH: | |
case RELAY_DSTMODE_LOADBALANCE: | |
case RELAY_DSTMODE_HASH: | |
case RELAY_DSTMODE_SRCHASH: | |
@@ -739,6 +741,7 @@ tableopts : CHECK tablecheck | |
"for redirections"); | |
YYERROR; | |
} | |
+ table->conf.hash_seed = $3; | |
/* FALLTHROUGH */ | |
case RELAY_DSTMODE_ROUNDROBIN: | |
dstmode = $2; | |
@@ -753,8 +756,33 @@ tableopts : CHECK tablecheck | |
break; | |
} | |
} | |
+ | MODE dstmode KEY STRING { | |
+ char *end; | |
+ if ($2 != RELAY_DSTMODE_CONSISTHASH) { | |
+ yyerror("mode does not requires a seed " | |
+ "key"); | |
+ free($4); | |
+ YYERROR; | |
+ } | |
+ if (!($4[0] == '0' && $4[1] == 'x')) { | |
+ yyerror("key must be an hex number"); | |
+ free($4); | |
+ YYERROR; | |
+ } | |
+ hashseed = strtoul($4, &end, 16); | |
+ if (*end != '\0') { | |
+ yyerror("illegal key value %s", $4); | |
+ free($4); | |
+ YYERROR; | |
+ } | |
+ table->conf.hash_seed = hashseed; | |
+ free($4); | |
+ } | |
; | |
+hashseed : /* nothing */ { $$ = hashseed; } | |
+ | SEED STRING { $$ = hash32_str($2, HASHINIT); } | |
+ | |
tablecheck : ICMP { table->conf.check = CHECK_ICMP; } | |
| TCP { table->conf.check = CHECK_TCP; } | |
| SSL { | |
@@ -1727,6 +1755,7 @@ forwardspec : STRING port retry { | |
rlt->rlt_table->conf.flags |= F_USED; | |
rlt->rlt_mode = dstmode; | |
rlt->rlt_flags = F_USED; | |
+ rlt->rlt_key = rlt->rlt_table->conf.hash_seed; | |
if (!TAILQ_EMPTY(&rlay->rl_tables)) | |
rlt->rlt_flags |= F_BACKUP; | |
@@ -1740,6 +1769,7 @@ dstmode : /* empty */ { $$ = RELAY_DSTMODE_DEFAULT; } | |
| HASH { $$ = RELAY_DSTMODE_HASH; } | |
| LEASTSTATES { $$ = RELAY_DSTMODE_LEASTSTATES; } | |
| SRCHASH { $$ = RELAY_DSTMODE_SRCHASH; } | |
+ | CONSISTHASH { $$ = RELAY_DSTMODE_CONSISTHASH; } | |
| RANDOM { $$ = RELAY_DSTMODE_RANDOM; } | |
; | |
@@ -1875,6 +1905,7 @@ routeoptsl : ROUTE address '/' NUMBER { | |
} | |
free($2); | |
} | |
+ | DISABLE { rlay->rl_conf.flags |= F_DISABLE; } | |
| include | |
; | |
@@ -2082,6 +2113,7 @@ lookup(char *s) | |
{ "check", CHECK }, | |
{ "ciphers", CIPHERS }, | |
{ "code", CODE }, | |
+ { "consistent-hash", CONSISTHASH }, | |
{ "cookie", COOKIE }, | |
{ "curve", CURVE }, | |
{ "demote", DEMOTE }, | |
@@ -2149,6 +2181,7 @@ lookup(char *s) | |
{ "rtlabel", RTLABEL }, | |
{ "sack", SACK }, | |
{ "script", SCRIPT }, | |
+ { "seed", SEED }, | |
{ "send", SEND }, | |
{ "session", SESSION }, | |
{ "set", SET }, | |
@@ -2543,6 +2576,9 @@ load_config(const char *filename, struct relayd *x_conf) | |
conf = x_conf; | |
conf->sc_flags = 0; | |
+ while (hashseed == 0) | |
+ hashseed = arc4random(); | |
+ | |
loadcfg = 1; | |
errors = 0; | |
last_host_id = last_table_id = last_rdr_id = last_proto_id = | |
@@ -3110,6 +3146,7 @@ relay_inherit(struct relay *ra, struct relay *rb) | |
} | |
rtb->rlt_table = rta->rlt_table; | |
rtb->rlt_mode = rta->rlt_mode; | |
+ rtb->rlt_key = rta->rlt_key; | |
rtb->rlt_flags = rta->rlt_flags; | |
TAILQ_INSERT_TAIL(&rb->rl_tables, rtb, rlt_entry); | |
diff --git a/relayd/relay.c b/relayd/relay.c | |
index b5d5f8f..8bb4230 100644 | |
--- a/relayd/relay.c | |
+++ b/relayd/relay.c | |
@@ -1,4 +1,4 @@ | |
-/* $OpenBSD: relay.c,v 1.175 2014/07/14 00:11:12 bluhm Exp $ */ | |
+/* $OpenBSD: relay.c,v 1.176 2014/08/29 09:03:36 blambert Exp $ */ | |
/* | |
* Copyright (c) 2006 - 2014 Reyk Floeter <reyk@openbsd.org> | |
@@ -87,6 +87,11 @@ void relay_ssl_readcb(int, short, void *); | |
void relay_ssl_writecb(int, short, void *); | |
char *relay_load_file(const char *, off_t *); | |
+void relay_hashring_assign(struct host *, struct relay_table *); | |
+int relay_hashring_lookup(u_int32_t, struct table *); | |
+void relay_hashring_update(struct table *); | |
+u_int32_t relay_hashring_hash(u_int32_t); | |
+static int relay_hashring_cmp(const void *, const void *); | |
extern void bufferevent_read_pressure_cb(struct evbuffer *, size_t, | |
size_t, void *); | |
@@ -429,9 +434,13 @@ relay_launch(void) | |
break; | |
case RELAY_DSTMODE_LOADBALANCE: | |
case RELAY_DSTMODE_HASH: | |
+ case RELAY_DSTMODE_CONSISTHASH: | |
+ rlt->rlt_table->conf.rlay_mode = rlt->rlt_mode; | |
+ /* FALLTHROUGH */ | |
case RELAY_DSTMODE_SRCHASH: | |
rlt->rlt_key = | |
- hash32_str(rlay->rl_conf.name, HASHINIT); | |
+ hash32_str(rlay->rl_conf.name, | |
+ rlt->rlt_key); | |
rlt->rlt_key = | |
hash32_str(rlt->rlt_table->conf.name, | |
rlt->rlt_key); | |
@@ -442,8 +451,13 @@ relay_launch(void) | |
if (rlt->rlt_nhosts >= RELAY_MAXHOSTS) | |
fatal("relay_init: " | |
"too many hosts in table"); | |
+ host->ringkey = relay_hash_addr(&host->conf.ss, | |
+ HASHINIT); | |
host->idx = rlt->rlt_nhosts; | |
rlt->rlt_host[rlt->rlt_nhosts++] = host; | |
+ relay_hashring_assign(host, rlt); | |
+ log_info("hashring host %s key 0x%08x", | |
+ host->conf.name, host->ringkey); | |
} | |
log_info("adding %d hosts from table %s%s", | |
rlt->rlt_nhosts, rlt->rlt_table->conf.name, | |
@@ -1232,23 +1246,29 @@ relay_from_table(struct rsession *con) | |
idx = (int)arc4random_uniform(rlt->rlt_nhosts); | |
break; | |
case RELAY_DSTMODE_SRCHASH: | |
- case RELAY_DSTMODE_LOADBALANCE: | |
/* Source IP address without port */ | |
p = relay_hash_addr(&con->se_in.ss, p); | |
- if (rlt->rlt_mode == RELAY_DSTMODE_SRCHASH) | |
- break; | |
- /* FALLTHROUGH */ | |
+ idx = relay_hashring_lookup(p, table); | |
+ break; | |
+ case RELAY_DSTMODE_LOADBALANCE: | |
case RELAY_DSTMODE_HASH: | |
+ /* Source IP address without port */ | |
+ p = relay_hash_addr(&con->se_in.ss, p); | |
/* Local "destination" IP address and port */ | |
p = relay_hash_addr(&rlay->rl_conf.ss, p); | |
p = hash32_buf(&rlay->rl_conf.port, | |
sizeof(rlay->rl_conf.port), p); | |
+ idx = relay_hashring_lookup(p, table); | |
+ break; | |
+ case RELAY_DSTMODE_CONSISTHASH: | |
+ p = relay_hashring_hash(p); | |
+ idx = relay_hashring_lookup(p, table); | |
break; | |
default: | |
fatalx("relay_from_table: unsupported mode"); | |
/* NOTREACHED */ | |
} | |
- if (idx == -1 && (idx = p % rlt->rlt_nhosts) >= RELAY_MAXHOSTS) | |
+ if (idx == -1) | |
return (-1); | |
host = rlt->rlt_host[idx]; | |
DPRINTF("%s: session %d: table %s host %s, p 0x%08x, idx %d", | |
@@ -1698,6 +1718,7 @@ relay_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg) | |
table->up--; | |
host->flags |= F_DISABLE; | |
host->up = HOST_UNKNOWN; | |
+ relay_hashring_update(table); | |
break; | |
case IMSG_HOST_ENABLE: | |
memcpy(&id, imsg->data, sizeof(id)); | |
@@ -1714,6 +1735,7 @@ relay_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg) | |
table->up = 0; | |
TAILQ_FOREACH(host, &table->hosts, entry) | |
host->up = HOST_UNKNOWN; | |
+ relay_hashring_update(table); | |
break; | |
case IMSG_TABLE_ENABLE: | |
memcpy(&id, imsg->data, sizeof(id)); | |
@@ -1723,6 +1745,7 @@ relay_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg) | |
table->up = 0; | |
TAILQ_FOREACH(host, &table->hosts, entry) | |
host->up = HOST_UNKNOWN; | |
+ relay_hashring_update(table); | |
break; | |
case IMSG_HOST_STATUS: | |
IMSG_SIZE_CHECK(imsg, &st); | |
@@ -1755,6 +1778,7 @@ relay_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg) | |
else | |
table->up--; | |
host->up = st.up; | |
+ relay_hashring_update(table); | |
break; | |
case IMSG_NATLOOK: | |
bcopy(imsg->data, &cnl, sizeof(cnl)); | |
@@ -2649,6 +2673,111 @@ relay_load_certfiles(struct relay *rlay) | |
return (0); | |
} | |
+static int | |
+relay_hashring_cmp(const void *aa, const void *bb) | |
+{ | |
+ const struct host_ring *a = aa; | |
+ const struct host_ring *b = bb; | |
+ | |
+ if (a->ringkey < b->ringkey) | |
+ return (-1); | |
+ else if (a->ringkey > b->ringkey) | |
+ return (1); | |
+ else | |
+ return (0); | |
+} | |
+ | |
+int | |
+relay_hashring_lookup(u_int32_t key, struct table *table) | |
+{ | |
+ struct host_ring *r; | |
+ int n = table->nhosts; | |
+ | |
+ if (!table->up) | |
+ return (-1); | |
+ | |
+ do { | |
+ r = &table->host_ring[--n]; | |
+ if (key > r->ringkey) | |
+ break; | |
+ } while (n); | |
+ if (n == 0 && key < r->ringkey) { | |
+ /* 0 < key < ring[0].key */ | |
+ n = table->nhosts - 1; | |
+ if ((table->host_ring[0].ringkey - key) <= | |
+ ((UINT32_MAX - table->host_ring[n].ringkey) + key)) { | |
+ n = 0; | |
+ } | |
+ } else if (n == (table->nhosts - 1)) { | |
+ /* UINT32_MAX > key > ring[nhosts - 1].key */ | |
+ if ((table->host_ring[0].ringkey + (UINT32_MAX - key)) <= | |
+ (key - table->host_ring[n].ringkey)) { | |
+ n = 0; | |
+ } | |
+ } else { | |
+ if ((table->host_ring[n + 1].ringkey - key) <= | |
+ (key - table->host_ring[n].ringkey)) { | |
+ n++; | |
+ } | |
+ } | |
+ r = &table->host_ring[n]; | |
+ | |
+ return (r->host->idx); | |
+} | |
+ | |
+void | |
+relay_hashring_update(struct table *table) | |
+{ | |
+ struct host *host; | |
+ int nhosts = 0; | |
+ | |
+ if (table->conf.rlay_mode != RELAY_DSTMODE_CONSISTHASH) | |
+ return; | |
+ | |
+ if (table->up == table->lastup) | |
+ return; | |
+ | |
+ table->lastup = table->up; | |
+ memset(table->host_ring, 0, sizeof(table->host_ring)); | |
+ | |
+ if (!table->up) | |
+ return; | |
+ | |
+ TAILQ_FOREACH(host, &table->hosts, entry) { | |
+ if (host->up != HOST_UP) | |
+ continue; | |
+ table->host_ring[nhosts].host = host; | |
+ table->host_ring[nhosts].ringkey = host->ringkey; | |
+ nhosts++; | |
+ } | |
+ | |
+ table->nhosts = nhosts; | |
+ if (!nhosts) | |
+ return; | |
+ | |
+ qsort(table->host_ring, nhosts, sizeof(struct host_ring), | |
+ relay_hashring_cmp); | |
+} | |
+ | |
+void | |
+relay_hashring_assign(struct host *h, struct relay_table *rlt) | |
+{ | |
+ h->ringkey = hash32_buf(&h->idx, sizeof(h->idx), rlt->rlt_key); | |
+ h->ringkey = relay_hash_addr(&h->conf.ss, h->ringkey); | |
+ h->ringkey = relay_hashring_hash(h->ringkey); | |
+} | |
+ | |
+u_int32_t | |
+relay_hashring_hash(u_int32_t a) | |
+{ | |
+ /* | |
+ * relay_hashring_hash() is central entry point for a hash function of | |
+ * choice | |
+ */ | |
+ a = hash32_buf(&a, sizeof(a), a); | |
+ return (a); | |
+} | |
+ | |
int | |
relay_session_cmp(struct rsession *a, struct rsession *b) | |
{ | |
diff --git a/relayd/relayd.h b/relayd/relayd.h | |
index 622f871..ed418a2 100644 | |
--- a/relayd/relayd.h | |
+++ b/relayd/relayd.h | |
@@ -1,4 +1,4 @@ | |
-/* $OpenBSD: relayd.h,v 1.189 2014/07/14 00:11:12 bluhm Exp $ */ | |
+/* $OpenBSD: relayd.h,v 1.191 2014/08/29 09:03:36 blambert Exp $ */ | |
/* | |
* Copyright (c) 2006 - 2014 Reyk Floeter <reyk@openbsd.org> | |
@@ -395,11 +395,17 @@ struct host { | |
u_long up_cnt; | |
int retry_cnt; | |
int idx; | |
+ u_int32_t ringkey; | |
u_int16_t he; | |
struct ctl_tcp_event cte; | |
}; | |
TAILQ_HEAD(hostlist, host); | |
+struct host_ring { | |
+ struct host *host; | |
+ u_int32_t ringkey; | |
+}; | |
+ | |
enum host_error { | |
HCE_NONE = 0, | |
HCE_ABORT, | |
@@ -462,6 +468,8 @@ struct table_config { | |
char digest[41]; /* length of sha1 digest * 2 */ | |
u_int8_t digest_type; | |
enum forwardmode fwdmode; | |
+ int rlay_mode; | |
+ u_int32_t hash_seed; | |
}; | |
struct table { | |
@@ -470,6 +478,9 @@ struct table { | |
int up; | |
int skipped; | |
struct hostlist hosts; | |
+ struct host_ring host_ring[RELAY_MAXHOSTS]; | |
+ int nhosts; | |
+ int lastup; | |
SSL_CTX *ssl_ctx; | |
char *sendbuf; | |
}; | |
@@ -774,6 +785,7 @@ enum dstmode { | |
RELAY_DSTMODE_LOADBALANCE = 0, | |
RELAY_DSTMODE_ROUNDROBIN, | |
RELAY_DSTMODE_HASH, | |
+ RELAY_DSTMODE_CONSISTHASH, | |
RELAY_DSTMODE_SRCHASH, | |
RELAY_DSTMODE_LEASTSTATES, | |
RELAY_DSTMODE_RANDOM | |
@@ -976,6 +988,7 @@ struct privsep { | |
struct event ps_evsigchld; | |
struct event ps_evsighup; | |
struct event ps_evsigpipe; | |
+ struct event ps_evsigusr1; | |
int ps_noaction; | |
struct passwd *ps_pw; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment