Created
September 28, 2021 10:49
-
-
Save imcitius/ed36d90e9d0e378ee974c48db891c302 to your computer and use it in GitHub Desktop.
redis fix-split-brain.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
HOSTNAME="$(hostname)" | |
INDEX="${HOSTNAME##*-}" | |
SENTINEL_PORT=26379 | |
ANNOUNCE_IP='' | |
MASTER='' | |
MASTER_GROUP="redis" | |
QUORUM="2" | |
REDIS_CONF=/data/conf/redis.conf | |
REDIS_PORT=6379 | |
REDIS_TLS_PORT= | |
SENTINEL_CONF=/data/conf/sentinel.conf | |
SENTINEL_TLS_PORT= | |
SERVICE=redis-redis-ha | |
SENTINEL_TLS_REPLICATION_ENABLED=false | |
REDIS_TLS_REPLICATION_ENABLED=false | |
ROLE='' | |
REDIS_MASTER='' | |
set -eu | |
sentinel_get_master() { | |
set +e | |
if [ "$SENTINEL_PORT" -eq 0 ]; then | |
redis-cli -h "${SERVICE}" -p "${SENTINEL_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key sentinel get-master-addr-by-name "${MASTER_GROUP}" |\ | |
grep -E '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' | |
else | |
redis-cli -h "${SERVICE}" -p "${SENTINEL_PORT}" sentinel get-master-addr-by-name "${MASTER_GROUP}" |\ | |
grep -E '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' | |
fi | |
set -e | |
} | |
sentinel_get_master_retry() { | |
master='' | |
retry=${1} | |
sleep=3 | |
for i in $(seq 1 "${retry}"); do | |
master=$(sentinel_get_master) | |
if [ -n "${master}" ]; then | |
break | |
fi | |
sleep $((sleep + i)) | |
done | |
echo "${master}" | |
} | |
identify_master() { | |
echo "Identifying redis master (get-master-addr-by-name).." | |
echo " using sentinel (redis-redis-ha), sentinel group name (redis)" | |
echo " $(date).." | |
MASTER="$(sentinel_get_master_retry 3)" | |
if [ -n "${MASTER}" ]; then | |
echo " $(date) Found redis master (${MASTER})" | |
else | |
echo " $(date) Did not find redis master (${MASTER})" | |
fi | |
} | |
sentinel_update() { | |
echo "Updating sentinel config.." | |
echo " evaluating sentinel id (\${SENTINEL_ID_${INDEX}})" | |
eval MY_SENTINEL_ID="\$SENTINEL_ID_${INDEX}" | |
echo " sentinel id (${MY_SENTINEL_ID}), sentinel grp (${MASTER_GROUP}), quorum (${QUORUM})" | |
sed -i "1s/^/sentinel myid ${MY_SENTINEL_ID}\\n/" "${SENTINEL_CONF}" | |
if [ "$SENTINEL_TLS_REPLICATION_ENABLED" = true ]; then | |
echo " redis master (${1}:${REDIS_TLS_PORT})" | |
sed -i "2s/^/sentinel monitor ${MASTER_GROUP} ${1} ${REDIS_TLS_PORT} ${QUORUM} \\n/" "${SENTINEL_CONF}" | |
else | |
echo " redis master (${1}:${REDIS_PORT})" | |
sed -i "2s/^/sentinel monitor ${MASTER_GROUP} ${1} ${REDIS_PORT} ${QUORUM} \\n/" "${SENTINEL_CONF}" | |
fi | |
echo "sentinel announce-ip ${ANNOUNCE_IP}" >> ${SENTINEL_CONF} | |
if [ "$SENTINEL_PORT" -eq 0 ]; then | |
echo " announce (${ANNOUNCE_IP}:${SENTINEL_TLS_PORT})" | |
echo "sentinel announce-port ${SENTINEL_TLS_PORT}" >> ${SENTINEL_CONF} | |
else | |
echo " announce (${ANNOUNCE_IP}:${SENTINEL_PORT})" | |
echo "sentinel announce-port ${SENTINEL_PORT}" >> ${SENTINEL_CONF} | |
fi | |
} | |
redis_update() { | |
echo "Updating redis config.." | |
if [ "$REDIS_TLS_REPLICATION_ENABLED" = true ]; then | |
echo " we are slave of redis master (${1}:${REDIS_TLS_PORT})" | |
echo "slaveof ${1} ${REDIS_TLS_PORT}" >> "${REDIS_CONF}" | |
echo "slave-announce-port ${REDIS_TLS_PORT}" >> ${REDIS_CONF} | |
else | |
echo " we are slave of redis master (${1}:${REDIS_PORT})" | |
echo "slaveof ${1} ${REDIS_PORT}" >> "${REDIS_CONF}" | |
echo "slave-announce-port ${REDIS_PORT}" >> ${REDIS_CONF} | |
fi | |
echo "slave-announce-ip ${ANNOUNCE_IP}" >> ${REDIS_CONF} | |
} | |
copy_config() { | |
echo "Copying default redis config.." | |
echo " to '${REDIS_CONF}'" | |
cp /readonly-config/redis.conf "${REDIS_CONF}" | |
echo "Copying default sentinel config.." | |
echo " to '${SENTINEL_CONF}'" | |
cp /readonly-config/sentinel.conf "${SENTINEL_CONF}" | |
} | |
setup_defaults() { | |
echo "Setting up defaults.." | |
echo " using statefulset index (${INDEX})" | |
if [ "${INDEX}" = "0" ]; then | |
echo "Setting this pod as master for redis and sentinel.." | |
echo " using announce (${ANNOUNCE_IP})" | |
redis_update "${ANNOUNCE_IP}" | |
sentinel_update "${ANNOUNCE_IP}" | |
echo " make sure ${ANNOUNCE_IP} is not a slave (slaveof no one)" | |
sed -i "s/^.*slaveof.*//" "${REDIS_CONF}" | |
else | |
echo "Getting redis master ip.." | |
echo " blindly assuming (${SERVICE}-announce-0) or (${SERVICE}-server-0) are master" | |
DEFAULT_MASTER="$(getent_hosts 0 | awk '{ print $1 }')" | |
echo " identified redis (may be redis master) ip (${DEFAULT_MASTER})" | |
if [ -z "${DEFAULT_MASTER}" ]; then | |
echo "Error: Unable to resolve redis master (getent hosts)." | |
exit 1 | |
fi | |
echo "Setting default slave config for redis and sentinel.." | |
echo " using master ip (${DEFAULT_MASTER})" | |
redis_update "${DEFAULT_MASTER}" | |
sentinel_update "${DEFAULT_MASTER}" | |
fi | |
} | |
redis_ping() { | |
set +e | |
if [ "$REDIS_PORT" -eq 0 ]; then | |
redis-cli -h "${MASTER}" -p "${REDIS_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key ping | |
else | |
redis-cli -h "${MASTER}" -p "${REDIS_PORT}" ping | |
fi | |
set -e | |
} | |
redis_ping_retry() { | |
ping='' | |
retry=${1} | |
sleep=3 | |
for i in $(seq 1 "${retry}"); do | |
if [ "$(redis_ping)" = "PONG" ]; then | |
ping='PONG' | |
break | |
fi | |
sleep $((sleep + i)) | |
MASTER=$(sentinel_get_master) | |
done | |
echo "${ping}" | |
} | |
find_master() { | |
echo "Verifying redis master.." | |
if [ "$REDIS_PORT" -eq 0 ]; then | |
echo " ping (${MASTER}:${REDIS_TLS_PORT})" | |
else | |
echo " ping (${MASTER}:${REDIS_PORT})" | |
fi | |
echo " $(date).." | |
if [ "$(redis_ping_retry 3)" != "PONG" ]; then | |
echo " $(date) Can't ping redis master (${MASTER})" | |
echo "Attempting to force failover (sentinel failover).." | |
if [ "$SENTINEL_PORT" -eq 0 ]; then | |
echo " on sentinel (${SERVICE}:${SENTINEL_TLS_PORT}), sentinel grp (${MASTER_GROUP})" | |
echo " $(date).." | |
if redis-cli -h "${SERVICE}" -p "${SENTINEL_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key sentinel failover "${MASTER_GROUP}" | grep -q 'NOGOODSLAVE' ; then | |
echo " $(date) Failover returned with 'NOGOODSLAVE'" | |
echo "Setting defaults for this pod.." | |
setup_defaults | |
return 0 | |
fi | |
else | |
echo " on sentinel (${SERVICE}:${SENTINEL_PORT}), sentinel grp (${MASTER_GROUP})" | |
echo " $(date).." | |
if redis-cli -h "${SERVICE}" -p "${SENTINEL_PORT}" sentinel failover "${MASTER_GROUP}" | grep -q 'NOGOODSLAVE' ; then | |
echo " $(date) Failover returned with 'NOGOODSLAVE'" | |
echo "Setting defaults for this pod.." | |
setup_defaults | |
return 0 | |
fi | |
fi | |
echo "Hold on for 10sec" | |
sleep 10 | |
echo "We should get redis master's ip now. Asking (get-master-addr-by-name).." | |
if [ "$SENTINEL_PORT" -eq 0 ]; then | |
echo " sentinel (${SERVICE}:${SENTINEL_TLS_PORT}), sentinel grp (${MASTER_GROUP})" | |
else | |
echo " sentinel (${SERVICE}:${SENTINEL_PORT}), sentinel grp (${MASTER_GROUP})" | |
fi | |
echo " $(date).." | |
MASTER="$(sentinel_get_master)" | |
if [ "${MASTER}" ]; then | |
echo " $(date) Found redis master (${MASTER})" | |
echo "Updating redis and sentinel config.." | |
sentinel_update "${MASTER}" | |
redis_update "${MASTER}" | |
else | |
echo "$(date) Error: Could not failover, exiting..." | |
exit 1 | |
fi | |
else | |
echo " $(date) Found reachable redis master (${MASTER})" | |
echo "Updating redis and sentinel config.." | |
sentinel_update "${MASTER}" | |
redis_update "${MASTER}" | |
fi | |
} | |
redis_ro_update() { | |
echo "Updating read-only redis config.." | |
echo " redis.conf set 'replica-priority 0'" | |
echo "replica-priority 0" >> ${REDIS_CONF} | |
} | |
getent_hosts() { | |
index=${1:-${INDEX}} | |
service="${SERVICE}-announce-${index}" | |
pod="${SERVICE}-server-${index}" | |
host=$(getent hosts "${service}") | |
if [ -z "${host}" ]; then | |
host=$(getent hosts "${pod}") | |
fi | |
echo "${host}" | |
} | |
identify_announce_ip() { | |
echo "Identify announce ip for this pod.." | |
echo " using (${SERVICE}-announce-${INDEX}) or (${SERVICE}-server-${INDEX})" | |
ANNOUNCE_IP=$(getent_hosts | awk '{ print $1 }') | |
echo " identified announce (${ANNOUNCE_IP})" | |
} | |
redis_role() { | |
set +e | |
if [ "$REDIS_PORT" -eq 0 ]; then | |
ROLE=$(redis-cli -p "${REDIS_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key info | grep role | sed 's/role://' | sed 's/\r//') | |
else | |
ROLE=$(redis-cli -p "${REDIS_PORT}" info | grep role | sed 's/role://' | sed 's/\r//') | |
fi | |
set -e | |
} | |
identify_redis_master() { | |
set +e | |
if [ "$REDIS_PORT" -eq 0 ]; then | |
REDIS_MASTER=$(redis-cli -p "${REDIS_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key info | grep master_host | sed 's/master_host://' | sed 's/\r//') | |
else | |
REDIS_MASTER=$(redis-cli -p "${REDIS_PORT}" info | grep master_host | sed 's/master_host://' | sed 's/\r//') | |
fi | |
set -e | |
} | |
reinit() { | |
set +e | |
sh /readonly-config/init.sh | |
if [ "$REDIS_PORT" -eq 0 ]; then | |
echo "shutdown" | redis-cli -p "${REDIS_TLS_PORT}" --tls --cacert /tls-certs/ca.crt --cert /tls-certs/redis.crt --key /tls-certs/redis.key | |
else | |
echo "shutdown" | redis-cli -p "${REDIS_PORT}" | |
fi | |
set -e | |
} | |
identify_announce_ip | |
while true; do | |
sleep 60 | |
# where is redis master | |
identify_master | |
if [ "$MASTER" == "$ANNOUNCE_IP" ]; then | |
redis_role | |
if [ "$ROLE" != "master" ]; then | |
reinit | |
fi | |
else | |
identify_redis_master | |
if [ "$REDIS_MASTER" != "$MASTER" ]; then | |
reinit | |
fi | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment