Skip to content

Instantly share code, notes, and snippets.

@robskillington
Last active July 2, 2019 00:00
Show Gist options
  • Save robskillington/8de62dbe631a1c5b81b6401204d96d77 to your computer and use it in GitHub Desktop.
Save robskillington/8de62dbe631a1c5b81b6401204d96d77 to your computer and use it in GitHub Desktop.
m3db-nodes-remove.sh
#!/bin/bash
echo "Example usage: KV_ZONE=embedded KV_ENV=default_env COORD_HTTP_HOST=\"host1\" COORD_HTTP_PORT=\"7201\" HOSTS=\"host1 host2 host3\" ISOLATION_GROUP=\"group\" ./m3dbops-nodes-remove.sh"
if [ "$HOSTS" = "" ]; then
echo "must set HOSTS"
exit 1
fi
if [ "$ISOLATION_GROUP" = "" ]; then
echo "must set ISOLATION_GROUP"
exit 1
fi
if [ "$KV_ZONE" = "" ]; then
echo "must set KV_ZONE"
exit 1
fi
if [ "$KV_ENV" = "" ]; then
echo "must set KV_ENV"
exit 1
fi
if [ "$COORD_HTTP_HOST" = "" ]; then
echo "must set COORD_HTTP_HOST"
exit 1
fi
if [ "$COORD_HTTP_PORT" = "" ]; then
echo "must set COORD_HTTP_PORT"
exit 1
fi
# Can override the node HTTP port with DBNODE_RPC_PORT
NODE_RPC_PORT=${DBNODE_HTTP_PORT:-9000}
# Can override the node HTTP port with DBNODE_HTTP_PORT
NODE_HTTP_PORT=${DBNODE_HTTP_PORT:-9002}
# Use following file for placement locally
placement_file="placement-${KV_ENV}-${KV_ZONE}.json"
set -ex
LOGFILE=$(mktemp)
echo "Logging verbose output: ${LOGFILE}"
export LOGFILE
function log()
{
echo "[$(date --rfc-3339=seconds)]: $*" >> "${LOGFILE}"
}
export -f log
function log_stdout()
{
echo "[$(date --rfc-3339=seconds)]: $*"
log "$*" # duplicate to make the log easier to follow
}
export -f log_stdout
function health_check_m3dbnode()
{
local HOST=$1
if [ "$HOST" = "" ]; then
log "invalid hostname: $HOST"
echo "BAD"
exit 1
fi
local HEALTH=$(curl -s "http://$HOST:$NODE_HTTP_PORT/health" 2>/dev/null)
if [ "$HEALTH" = "" ]; then
log "${HOST}: not responding to health check"
echo "BAD"
return 0
fi
local BOOTSTRAPPED=$(echo $HEALTH | jq .bootstrapped)
if [ "$BOOTSTRAPPED" != "true" ]; then
log "${HOST}: health check responding not bootstrapped"
echo "BAD"
return 0
fi
log "${HOST}: health check responding bootstrapped"
echo "OK"
}
export -f health_check_m3dbnode
function get_placement_file()
{
curl -s -H "Cluster-Zone-Name: ${KV_ZONE}" \
-H "Cluster-Environment-Name: ${KV_ENV}" \
http://$COORD_HTTP_HOST:$COORD_HTTP_PORT/api/v1/services/m3db/placement > ${placement_file}
}
export -f placement
function all_hosts()
{
get_placement_file
cat <(cat ${placement_file} | jq -r '.placement.instances | keys[]') <(echo $HOSTS | tr ' ' '\n') | sort | uniq
}
export -f all_hosts
function health_check_cluster()
{
# paranoia
local num_cluster_hosts=$(all_hosts | wc -l)
if [ "$num_cluster_hosts" -lt 10 ]; then
log "invalid num cluster hosts: $num_cluster_hosts"
echo "BAD"
exit 1
fi
# If it's not the case that all shards are available, don't continue.
get_placement_file
local shards_avail
shards_avail=$(jq '.instances | to_entries | map(.value.shards | map(.state)) | flatten | sort | unique == ["AVAILABLE"]' < "${placement_file}")
if [[ "$shards_avail" != "true" ]]; then
log "observed non-AVAILABLE shard states"
echo "BAD"
return 0
fi
num_ok=$(all_hosts | xargs -I{} -P 30 bash -c 'health_check_m3dbnode {}' | grep OK | wc -l)
if [ "$num_ok" -eq "$num_cluster_hosts" ]; then
log "all ${num_cluster_hosts} returned healthy status"
echo "OK"
return 0
fi
log "${num_ok} of ${num_cluster_hosts} are healthy, waiting till all return healthy"
echo "BAD"
}
export -f health_check_cluster
for host in $(echo $HOSTS | tr " " "\n"); do
log_stdout "###################"
log_stdout "removing $host"
log_stdout "ensuring cluster is healthy before removing"
while true; do
STATUS=$(health_check_cluster)
if [ "$STATUS" = "OK" ]; then
break
fi
sleep 30
done
log_stdout "removing $host from placement"
curl -X DELETE http://${COORD_HTTP_HOST}:${COORD_HTTP_PORT}/api/v1/services/m3db/placement/${host}
log_stdout "ensuring cluster is healthy before continuing"
while true; do
STATUS=$(health_check_cluster)
if [ "$STATUS" = "OK" ]; then
break
fi
sleep 30
done
log_stdout "$host removed, and cluster is back to being healthy."
SLEEP_SEC=600
log_stdout "sleeping for ${SLEEP_SEC}s before continuing"
sleep ${SLEEP_SEC}
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment