|
#!/bin/sh |
|
# This script assumes there are two keepalived and Valkey instances. |
|
set -eu |
|
|
|
# Maximum time in milliseconds a master will wait in the waiting-for-sync state |
|
# before aborting the failover attempt. |
|
FAILOVER_TIMEOUT=2000 |
|
|
|
# Time in seconds that a replica will wait for a failover initiated by the |
|
# master to occur. |
|
FAILOVER_WAIT=8 |
|
|
|
REDIS_CLI='valkey-cli' |
|
REDIS_PORT=6379 |
|
|
|
SYSLOG_TAG='keepalived-valkey' |
|
|
|
log() { |
|
local level="$1" |
|
local msg="$2" |
|
|
|
logger -s -t "$SYSLOG_TAG" -p local0.$level "$msg" |
|
} |
|
|
|
redis_cmd() { |
|
local stderr |
|
|
|
if ! stderr="$($REDIS_CLI -e "$@" 2>&1 >/dev/null)"; then |
|
log err "Command $1 failed: $stderr" |
|
return 1 |
|
fi |
|
} |
|
|
|
repl_info() { |
|
local name="$1" |
|
$REDIS_CLI -e INFO REPLICATION | sed -En "s/^$name:(\S*).*/\1/p" |
|
} |
|
|
|
is_master() { |
|
[ "$(repl_info 'role')" = master ] |
|
} |
|
|
|
replica_to_master() { |
|
# NOTE: When keepalived on the primary host is started and the secondary is |
|
# MASTER, it enters the BACKUP mode first, so Valkey syncs data from the |
|
# secondary node, then keepalived enters the MASTER mode and Valkey becomes |
|
# the master. |
|
if is_master; then |
|
log info "I'm already master, nothing to be done" |
|
return |
|
fi |
|
|
|
if [ "$(repl_info master_link_status)" = 'up' ]; then |
|
log info "Current master is up, waiting $FAILOVER_WAIT seconds for failover" |
|
sleep $FAILOVER_WAIT |
|
fi |
|
|
|
if is_master; then |
|
log info "Failover succeeded, I'm master now" |
|
else |
|
log warn 'Failover failed, executing REPLICAOF NO ONE' |
|
redis_cmd REPLICAOF NO ONE |
|
fi |
|
} |
|
|
|
master_to_replica() { |
|
local master_host="$1" |
|
|
|
if ! is_master; then |
|
log info "I'm already replica, nothing to be done" |
|
return |
|
fi |
|
|
|
if [ "$(repl_info connected_slaves)" -gt 0 ]; then |
|
log info 'Starting failover' |
|
if redis_cmd FAILOVER TIMEOUT "$FAILOVER_TIMEOUT"; then |
|
log info "Waiting $FAILOVER_WAIT seconds" |
|
sleep "$FAILOVER_WAIT" |
|
fi |
|
|
|
if is_master; then |
|
log warn 'Failover failed' |
|
else |
|
log info "Failover succeeded, I'm replica now" |
|
return |
|
fi |
|
fi |
|
|
|
log info "Executing REPLICAOF $master_host $REDIS_PORT" |
|
redis_cmd REPLICAOF "$master_host" $REDIS_PORT |
|
} |
|
|
|
|
|
if [ $# -lt 3 ]; then |
|
log err "Expected 3 arguments, got $#" |
|
exit 1 |
|
fi |
|
|
|
redis_cmd PING || exit 1 |
|
|
|
case "$3" in |
|
MASTER) |
|
replica_to_master |
|
;; |
|
BACKUP | FAULT) |
|
hostname="$(hostname -s)" |
|
domain="$(hostname -d)" |
|
|
|
case "$hostname" in |
|
*-01) other_host="${hostname%-*}-02.$domain";; |
|
*-02) other_host="${hostname%-*}-01.$domain";; |
|
esac |
|
|
|
master_to_replica "$other_host" |
|
;; |
|
esac |