Skip to content

Instantly share code, notes, and snippets.

@jfreeland
Created August 12, 2020 05:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jfreeland/e937248a298e7d8e506b92b6bbc8ebad to your computer and use it in GitHub Desktop.
Save jfreeland/e937248a298e7d8e506b92b6bbc8ebad to your computer and use it in GitHub Desktop.
this is just an example
#!/bin/bash
#
# This script is intended to be used to (slowly) cycle all of the nodes in an
# aws_autoscaling_group.
#
set -e
unset CONTEXT
DRY_RUN=false
PAUSE_TIME=300
usage() {
echo "This script is intended to be used to drain nodes in an aws_autoscaling_group"
echo "and ultimately force the replacement of those nodes."
echo ""
echo "-d : Dry run. Print, do not execute, commands."
echo "-c : The cluster you're performing the action on."
echo " This is used to set the kubernetes context as this is not an exported variable."
echo "-t : Time in seconds to pause between node operations. Default: 300"
echo " Pause time must be greater than 60 seconds."
}
while getopts ":c:t:d" opt; do
case ${opt} in
c )
CONTEXT=$OPTARG
# TODO: Could add some check to validate this is a valid kubectx.
;;
t )
PAUSE_TIME=$OPTARG
re_isanum='^[0-9]+$'
if ! [[ $PAUSE_TIME =~ $re_isanum ]] ; then
echo "Error: Pause time must be a number."
exit 1
elif [[ "$PAUSE_TIME" -lt 60 ]] ; then
echo "Error: Must wait at least 60 seconds between node operations."
exit 1
fi
;;
d )
DRY_RUN=true
;;
\? )
echo "Error: Unknown option."
usage
exit 0
;;
esac
done
if [ -z "$CONTEXT" ]; then
echo "Error: Must provide a cluster to perform action on."
echo ""
usage
exit 1
fi
if $DRY_RUN; then
echo "Dry run, only printing commands:"
PAUSE_TIME=2
KUBECTL_CMD="echo kubectl --context $CONTEXT"
else
KUBECTL_CMD="kubectl --context $CONTEXT"
fi
timestamp=$(date "+%Y/%M/%d %H:%M:%S")
label() {
$KUBECTL_CMD label $1 pending_action=replace
}
drain() {
$KUBECTL_CMD drain $1 --ignore-daemonsets --delete-local-data
$KUBECTL_CMD label $1 drained=true
}
# If there are nodes that already have a label that indicates that they should
# be replaced, replace those nodes and exit. You could run into this case if the
# script loses connectivity to your cluster or exits for whatever reason before
# finishing a full replacement.
pending_replacement=$(kubectl --context $CONTEXT get nodes -lpending_action=replace,drained!=true -o name --no-headers --ignore-not-found)
if [[ $pending_replacement ]]; then
echo "[$timestamp] Pause time between actions: $PAUSE_TIME"
echo "[$timestamp] Replacing previously labeled nodes:"
echo $pending_replacement
for node in $pending_replacement; do
echo "[$timestamp] Draining node $node"
drain $node
echo "[$timestamp] Sleeping for $PAUSE_TIME"
sleep $PAUSE_TIME
done
echo "[$timestamp] Complete. Nodes previously labeled replace have been drained."
exit 0
fi
# If no nodes were already labeled to be replaced, we'll first label all nodes
# and then we'll cycle and delete them one by one with a pause time.
echo "[$timestamp] Did not find any previously labeled nodes with pending_action=replace"
all_nodes=$(kubectl --context $CONTEXT get nodes -o name --no-headers)
if [[ $all_nodes ]]; then
echo "[$timestamp] Labeled and replacing all nodes."
for node in $all_nodes; do
echo "[$timestamp] Labeling node: $node"
label $node
done
echo "[$timestamp] Complete. All nodes have been labeled pending_action=replace"
echo "[$timestamp] Starting to replace nodes with $PAUSE_TIME seconds between node drains."
for node in $all_nodes; do
echo "[$timestamp] Draining node: $node"
drain $node
echo "[$timestamp] Sleeping for $PAUSE_TIME"
sleep $PAUSE_TIME
done
echo "[$timestamp] Complete. All nodes have been drained."
else
echo "[$timestamp] Error: Did not find any nodes to label."
exit 1
fi
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment