Skip to content

Instantly share code, notes, and snippets.

@bsdoliv
Last active August 29, 2015 14:05
Show Gist options
  • Save bsdoliv/2bc909afb7314e554354 to your computer and use it in GitHub Desktop.
Save bsdoliv/2bc909afb7314e554354 to your computer and use it in GitHub Desktop.
relayd-consistent-hash.diff
diff --git a/relayd/parse.y b/relayd/parse.y
index 3391560..34e1021 100644
--- a/relayd/parse.y
+++ b/relayd/parse.y
@@ -1,4 +1,4 @@
-/* $OpenBSD: parse.y,v 1.187 2014/07/11 17:35:16 reyk Exp $ */
+/* $OpenBSD: parse.y,v 1.188 2014/08/29 09:03:36 blambert Exp $ */
/*
* Copyright (c) 2007 - 2014 Reyk Floeter <reyk@openbsd.org>
@@ -115,6 +115,7 @@ static int tagged = 0;
static int tag = 0;
static in_port_t tableport = 0;
static int dstmode;
+static u_int32_t hashseed = 0;
static enum key_type keytype = KEY_TYPE_NONE;
static enum direction dir = RELAY_DIR_ANY;
static char *rulefile = NULL;
@@ -168,7 +169,7 @@ typedef struct {
%token SOCKET SPLICE SSL STICKYADDR STYLE TABLE TAG TAGGED TCP TIMEOUT TO
%token ROUTER RTLABEL TRANSPARENT TRAP UPDATES URL VIRTUAL WITH TTL RTABLE
%token MATCH PARAMS RANDOM LEASTSTATES SRCHASH KEY CERTIFICATE PASSWORD ECDH
-%token EDH CURVE
+%token EDH CURVE CONSISTHASH
%token <v.string> STRING
%token <v.number> NUMBER
%type <v.string> hostname interface table value optstring
@@ -177,7 +178,7 @@ typedef struct {
%type <v.number> optssl optsslclient sslcache
%type <v.number> redirect_proto relay_proto match
%type <v.number> action ruleaf key_option
-%type <v.number> ssldhparams sslecdhcurve
+%type <v.number> ssldhparams sslecdhcurve hashseed
%type <v.port> port
%type <v.host> host
%type <v.addr> address
@@ -728,8 +729,9 @@ tableopts : CHECK tablecheck
table->conf.skip_cnt =
($2 / conf->sc_interval.tv_sec) - 1;
}
- | MODE dstmode {
+ | MODE dstmode hashseed {
switch ($2) {
+ case RELAY_DSTMODE_CONSISTHASH:
case RELAY_DSTMODE_LOADBALANCE:
case RELAY_DSTMODE_HASH:
case RELAY_DSTMODE_SRCHASH:
@@ -739,6 +741,7 @@ tableopts : CHECK tablecheck
"for redirections");
YYERROR;
}
+ table->conf.hash_seed = $3;
/* FALLTHROUGH */
case RELAY_DSTMODE_ROUNDROBIN:
dstmode = $2;
@@ -753,8 +756,33 @@ tableopts : CHECK tablecheck
break;
}
}
+ | MODE dstmode KEY STRING {
+ char *end;
+ if ($2 != RELAY_DSTMODE_CONSISTHASH) {
+ yyerror("mode does not requires a seed "
+ "key");
+ free($4);
+ YYERROR;
+ }
+ if (!($4[0] == '0' && $4[1] == 'x')) {
+ yyerror("key must be an hex number");
+ free($4);
+ YYERROR;
+ }
+ hashseed = strtoul($4, &end, 16);
+ if (*end != '\0') {
+ yyerror("illegal key value %s", $4);
+ free($4);
+ YYERROR;
+ }
+ table->conf.hash_seed = hashseed;
+ free($4);
+ }
;
+hashseed : /* nothing */ { $$ = hashseed; }
+ | SEED STRING { $$ = hash32_str($2, HASHINIT); }
+
tablecheck : ICMP { table->conf.check = CHECK_ICMP; }
| TCP { table->conf.check = CHECK_TCP; }
| SSL {
@@ -1727,6 +1755,7 @@ forwardspec : STRING port retry {
rlt->rlt_table->conf.flags |= F_USED;
rlt->rlt_mode = dstmode;
rlt->rlt_flags = F_USED;
+ rlt->rlt_key = rlt->rlt_table->conf.hash_seed;
if (!TAILQ_EMPTY(&rlay->rl_tables))
rlt->rlt_flags |= F_BACKUP;
@@ -1740,6 +1769,7 @@ dstmode : /* empty */ { $$ = RELAY_DSTMODE_DEFAULT; }
| HASH { $$ = RELAY_DSTMODE_HASH; }
| LEASTSTATES { $$ = RELAY_DSTMODE_LEASTSTATES; }
| SRCHASH { $$ = RELAY_DSTMODE_SRCHASH; }
+ | CONSISTHASH { $$ = RELAY_DSTMODE_CONSISTHASH; }
| RANDOM { $$ = RELAY_DSTMODE_RANDOM; }
;
@@ -1875,6 +1905,7 @@ routeoptsl : ROUTE address '/' NUMBER {
}
free($2);
}
+ | DISABLE { rlay->rl_conf.flags |= F_DISABLE; }
| include
;
@@ -2082,6 +2113,7 @@ lookup(char *s)
{ "check", CHECK },
{ "ciphers", CIPHERS },
{ "code", CODE },
+ { "consistent-hash", CONSISTHASH },
{ "cookie", COOKIE },
{ "curve", CURVE },
{ "demote", DEMOTE },
@@ -2149,6 +2181,7 @@ lookup(char *s)
{ "rtlabel", RTLABEL },
{ "sack", SACK },
{ "script", SCRIPT },
+ { "seed", SEED },
{ "send", SEND },
{ "session", SESSION },
{ "set", SET },
@@ -2543,6 +2576,9 @@ load_config(const char *filename, struct relayd *x_conf)
conf = x_conf;
conf->sc_flags = 0;
+ while (hashseed == 0)
+ hashseed = arc4random();
+
loadcfg = 1;
errors = 0;
last_host_id = last_table_id = last_rdr_id = last_proto_id =
@@ -3110,6 +3146,7 @@ relay_inherit(struct relay *ra, struct relay *rb)
}
rtb->rlt_table = rta->rlt_table;
rtb->rlt_mode = rta->rlt_mode;
+ rtb->rlt_key = rta->rlt_key;
rtb->rlt_flags = rta->rlt_flags;
TAILQ_INSERT_TAIL(&rb->rl_tables, rtb, rlt_entry);
diff --git a/relayd/relay.c b/relayd/relay.c
index b5d5f8f..8bb4230 100644
--- a/relayd/relay.c
+++ b/relayd/relay.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: relay.c,v 1.175 2014/07/14 00:11:12 bluhm Exp $ */
+/* $OpenBSD: relay.c,v 1.176 2014/08/29 09:03:36 blambert Exp $ */
/*
* Copyright (c) 2006 - 2014 Reyk Floeter <reyk@openbsd.org>
@@ -87,6 +87,11 @@ void relay_ssl_readcb(int, short, void *);
void relay_ssl_writecb(int, short, void *);
char *relay_load_file(const char *, off_t *);
+void relay_hashring_assign(struct host *, struct relay_table *);
+int relay_hashring_lookup(u_int32_t, struct table *);
+void relay_hashring_update(struct table *);
+u_int32_t relay_hashring_hash(u_int32_t);
+static int relay_hashring_cmp(const void *, const void *);
extern void bufferevent_read_pressure_cb(struct evbuffer *, size_t,
size_t, void *);
@@ -429,9 +434,13 @@ relay_launch(void)
break;
case RELAY_DSTMODE_LOADBALANCE:
case RELAY_DSTMODE_HASH:
+ case RELAY_DSTMODE_CONSISTHASH:
+ rlt->rlt_table->conf.rlay_mode = rlt->rlt_mode;
+ /* FALLTHROUGH */
case RELAY_DSTMODE_SRCHASH:
rlt->rlt_key =
- hash32_str(rlay->rl_conf.name, HASHINIT);
+ hash32_str(rlay->rl_conf.name,
+ rlt->rlt_key);
rlt->rlt_key =
hash32_str(rlt->rlt_table->conf.name,
rlt->rlt_key);
@@ -442,8 +451,13 @@ relay_launch(void)
if (rlt->rlt_nhosts >= RELAY_MAXHOSTS)
fatal("relay_init: "
"too many hosts in table");
+ host->ringkey = relay_hash_addr(&host->conf.ss,
+ HASHINIT);
host->idx = rlt->rlt_nhosts;
rlt->rlt_host[rlt->rlt_nhosts++] = host;
+ relay_hashring_assign(host, rlt);
+ log_info("hashring host %s key 0x%08x",
+ host->conf.name, host->ringkey);
}
log_info("adding %d hosts from table %s%s",
rlt->rlt_nhosts, rlt->rlt_table->conf.name,
@@ -1232,23 +1246,29 @@ relay_from_table(struct rsession *con)
idx = (int)arc4random_uniform(rlt->rlt_nhosts);
break;
case RELAY_DSTMODE_SRCHASH:
- case RELAY_DSTMODE_LOADBALANCE:
/* Source IP address without port */
p = relay_hash_addr(&con->se_in.ss, p);
- if (rlt->rlt_mode == RELAY_DSTMODE_SRCHASH)
- break;
- /* FALLTHROUGH */
+ idx = relay_hashring_lookup(p, table);
+ break;
+ case RELAY_DSTMODE_LOADBALANCE:
case RELAY_DSTMODE_HASH:
+ /* Source IP address without port */
+ p = relay_hash_addr(&con->se_in.ss, p);
/* Local "destination" IP address and port */
p = relay_hash_addr(&rlay->rl_conf.ss, p);
p = hash32_buf(&rlay->rl_conf.port,
sizeof(rlay->rl_conf.port), p);
+ idx = relay_hashring_lookup(p, table);
+ break;
+ case RELAY_DSTMODE_CONSISTHASH:
+ p = relay_hashring_hash(p);
+ idx = relay_hashring_lookup(p, table);
break;
default:
fatalx("relay_from_table: unsupported mode");
/* NOTREACHED */
}
- if (idx == -1 && (idx = p % rlt->rlt_nhosts) >= RELAY_MAXHOSTS)
+ if (idx == -1)
return (-1);
host = rlt->rlt_host[idx];
DPRINTF("%s: session %d: table %s host %s, p 0x%08x, idx %d",
@@ -1698,6 +1718,7 @@ relay_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg)
table->up--;
host->flags |= F_DISABLE;
host->up = HOST_UNKNOWN;
+ relay_hashring_update(table);
break;
case IMSG_HOST_ENABLE:
memcpy(&id, imsg->data, sizeof(id));
@@ -1714,6 +1735,7 @@ relay_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg)
table->up = 0;
TAILQ_FOREACH(host, &table->hosts, entry)
host->up = HOST_UNKNOWN;
+ relay_hashring_update(table);
break;
case IMSG_TABLE_ENABLE:
memcpy(&id, imsg->data, sizeof(id));
@@ -1723,6 +1745,7 @@ relay_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg)
table->up = 0;
TAILQ_FOREACH(host, &table->hosts, entry)
host->up = HOST_UNKNOWN;
+ relay_hashring_update(table);
break;
case IMSG_HOST_STATUS:
IMSG_SIZE_CHECK(imsg, &st);
@@ -1755,6 +1778,7 @@ relay_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg)
else
table->up--;
host->up = st.up;
+ relay_hashring_update(table);
break;
case IMSG_NATLOOK:
bcopy(imsg->data, &cnl, sizeof(cnl));
@@ -2649,6 +2673,111 @@ relay_load_certfiles(struct relay *rlay)
return (0);
}
+static int
+relay_hashring_cmp(const void *aa, const void *bb)
+{
+ const struct host_ring *a = aa;
+ const struct host_ring *b = bb;
+
+ if (a->ringkey < b->ringkey)
+ return (-1);
+ else if (a->ringkey > b->ringkey)
+ return (1);
+ else
+ return (0);
+}
+
+int
+relay_hashring_lookup(u_int32_t key, struct table *table)
+{
+ struct host_ring *r;
+ int n = table->nhosts;
+
+ if (!table->up)
+ return (-1);
+
+ do {
+ r = &table->host_ring[--n];
+ if (key > r->ringkey)
+ break;
+ } while (n);
+ if (n == 0 && key < r->ringkey) {
+ /* 0 < key < ring[0].key */
+ n = table->nhosts - 1;
+ if ((table->host_ring[0].ringkey - key) <=
+ ((UINT32_MAX - table->host_ring[n].ringkey) + key)) {
+ n = 0;
+ }
+ } else if (n == (table->nhosts - 1)) {
+ /* UINT32_MAX > key > ring[nhosts - 1].key */
+ if ((table->host_ring[0].ringkey + (UINT32_MAX - key)) <=
+ (key - table->host_ring[n].ringkey)) {
+ n = 0;
+ }
+ } else {
+ if ((table->host_ring[n + 1].ringkey - key) <=
+ (key - table->host_ring[n].ringkey)) {
+ n++;
+ }
+ }
+ r = &table->host_ring[n];
+
+ return (r->host->idx);
+}
+
+void
+relay_hashring_update(struct table *table)
+{
+ struct host *host;
+ int nhosts = 0;
+
+ if (table->conf.rlay_mode != RELAY_DSTMODE_CONSISTHASH)
+ return;
+
+ if (table->up == table->lastup)
+ return;
+
+ table->lastup = table->up;
+ memset(table->host_ring, 0, sizeof(table->host_ring));
+
+ if (!table->up)
+ return;
+
+ TAILQ_FOREACH(host, &table->hosts, entry) {
+ if (host->up != HOST_UP)
+ continue;
+ table->host_ring[nhosts].host = host;
+ table->host_ring[nhosts].ringkey = host->ringkey;
+ nhosts++;
+ }
+
+ table->nhosts = nhosts;
+ if (!nhosts)
+ return;
+
+ qsort(table->host_ring, nhosts, sizeof(struct host_ring),
+ relay_hashring_cmp);
+}
+
+void
+relay_hashring_assign(struct host *h, struct relay_table *rlt)
+{
+ h->ringkey = hash32_buf(&h->idx, sizeof(h->idx), rlt->rlt_key);
+ h->ringkey = relay_hash_addr(&h->conf.ss, h->ringkey);
+ h->ringkey = relay_hashring_hash(h->ringkey);
+}
+
+u_int32_t
+relay_hashring_hash(u_int32_t a)
+{
+ /*
+ * relay_hashring_hash() is central entry point for a hash function of
+ * choice
+ */
+ a = hash32_buf(&a, sizeof(a), a);
+ return (a);
+}
+
int
relay_session_cmp(struct rsession *a, struct rsession *b)
{
diff --git a/relayd/relayd.h b/relayd/relayd.h
index 622f871..ed418a2 100644
--- a/relayd/relayd.h
+++ b/relayd/relayd.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: relayd.h,v 1.189 2014/07/14 00:11:12 bluhm Exp $ */
+/* $OpenBSD: relayd.h,v 1.191 2014/08/29 09:03:36 blambert Exp $ */
/*
* Copyright (c) 2006 - 2014 Reyk Floeter <reyk@openbsd.org>
@@ -395,11 +395,17 @@ struct host {
u_long up_cnt;
int retry_cnt;
int idx;
+ u_int32_t ringkey;
u_int16_t he;
struct ctl_tcp_event cte;
};
TAILQ_HEAD(hostlist, host);
+struct host_ring {
+ struct host *host;
+ u_int32_t ringkey;
+};
+
enum host_error {
HCE_NONE = 0,
HCE_ABORT,
@@ -462,6 +468,8 @@ struct table_config {
char digest[41]; /* length of sha1 digest * 2 */
u_int8_t digest_type;
enum forwardmode fwdmode;
+ int rlay_mode;
+ u_int32_t hash_seed;
};
struct table {
@@ -470,6 +478,9 @@ struct table {
int up;
int skipped;
struct hostlist hosts;
+ struct host_ring host_ring[RELAY_MAXHOSTS];
+ int nhosts;
+ int lastup;
SSL_CTX *ssl_ctx;
char *sendbuf;
};
@@ -774,6 +785,7 @@ enum dstmode {
RELAY_DSTMODE_LOADBALANCE = 0,
RELAY_DSTMODE_ROUNDROBIN,
RELAY_DSTMODE_HASH,
+ RELAY_DSTMODE_CONSISTHASH,
RELAY_DSTMODE_SRCHASH,
RELAY_DSTMODE_LEASTSTATES,
RELAY_DSTMODE_RANDOM
@@ -976,6 +988,7 @@ struct privsep {
struct event ps_evsigchld;
struct event ps_evsighup;
struct event ps_evsigpipe;
+ struct event ps_evsigusr1;
int ps_noaction;
struct passwd *ps_pw;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment