Created
August 12, 2020 05:34
-
-
Save jfreeland/e937248a298e7d8e506b92b6bbc8ebad to your computer and use it in GitHub Desktop.
this is just an example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# This script is intended to be used to (slowly) cycle all of the nodes in an | |
# aws_autoscaling_group. | |
# | |
set -e | |
unset CONTEXT | |
DRY_RUN=false | |
PAUSE_TIME=300 | |
usage() { | |
echo "This script is intended to be used to drain nodes in an aws_autoscaling_group" | |
echo "and ultimately force the replacement of those nodes." | |
echo "" | |
echo "-d : Dry run. Print, do not execute, commands." | |
echo "-c : The cluster you're performing the action on." | |
echo " This is used to set the kubernetes context as this is not an exported variable." | |
echo "-t : Time in seconds to pause between node operations. Default: 300" | |
echo " Pause time must be greater than 60 seconds." | |
} | |
while getopts ":c:t:d" opt; do | |
case ${opt} in | |
c ) | |
CONTEXT=$OPTARG | |
# TODO: Could add some check to validate this is a valid kubectx. | |
;; | |
t ) | |
PAUSE_TIME=$OPTARG | |
re_isanum='^[0-9]+$' | |
if ! [[ $PAUSE_TIME =~ $re_isanum ]] ; then | |
echo "Error: Pause time must be a number." | |
exit 1 | |
elif [[ "$PAUSE_TIME" -lt 60 ]] ; then | |
echo "Error: Must wait at least 60 seconds between node operations." | |
exit 1 | |
fi | |
;; | |
d ) | |
DRY_RUN=true | |
;; | |
\? ) | |
echo "Error: Unknown option." | |
usage | |
exit 0 | |
;; | |
esac | |
done | |
if [ -z "$CONTEXT" ]; then | |
echo "Error: Must provide a cluster to perform action on." | |
echo "" | |
usage | |
exit 1 | |
fi | |
if $DRY_RUN; then | |
echo "Dry run, only printing commands:" | |
PAUSE_TIME=2 | |
KUBECTL_CMD="echo kubectl --context $CONTEXT" | |
else | |
KUBECTL_CMD="kubectl --context $CONTEXT" | |
fi | |
timestamp=$(date "+%Y/%M/%d %H:%M:%S") | |
label() { | |
$KUBECTL_CMD label $1 pending_action=replace | |
} | |
drain() { | |
$KUBECTL_CMD drain $1 --ignore-daemonsets --delete-local-data | |
$KUBECTL_CMD label $1 drained=true | |
} | |
# If there are nodes that already have a label that indicates that they should | |
# be replaced, replace those nodes and exit. You could run into this case if the | |
# script loses connectivity to your cluster or exits for whatever reason before | |
# finishing a full replacement. | |
pending_replacement=$(kubectl --context $CONTEXT get nodes -lpending_action=replace,drained!=true -o name --no-headers --ignore-not-found) | |
if [[ $pending_replacement ]]; then | |
echo "[$timestamp] Pause time between actions: $PAUSE_TIME" | |
echo "[$timestamp] Replacing previously labeled nodes:" | |
echo $pending_replacement | |
for node in $pending_replacement; do | |
echo "[$timestamp] Draining node $node" | |
drain $node | |
echo "[$timestamp] Sleeping for $PAUSE_TIME" | |
sleep $PAUSE_TIME | |
done | |
echo "[$timestamp] Complete. Nodes previously labeled replace have been drained." | |
exit 0 | |
fi | |
# If no nodes were already labeled to be replaced, we'll first label all nodes | |
# and then we'll cycle and delete them one by one with a pause time. | |
echo "[$timestamp] Did not find any previously labeled nodes with pending_action=replace" | |
all_nodes=$(kubectl --context $CONTEXT get nodes -o name --no-headers) | |
if [[ $all_nodes ]]; then | |
echo "[$timestamp] Labeled and replacing all nodes." | |
for node in $all_nodes; do | |
echo "[$timestamp] Labeling node: $node" | |
label $node | |
done | |
echo "[$timestamp] Complete. All nodes have been labeled pending_action=replace" | |
echo "[$timestamp] Starting to replace nodes with $PAUSE_TIME seconds between node drains." | |
for node in $all_nodes; do | |
echo "[$timestamp] Draining node: $node" | |
drain $node | |
echo "[$timestamp] Sleeping for $PAUSE_TIME" | |
sleep $PAUSE_TIME | |
done | |
echo "[$timestamp] Complete. All nodes have been drained." | |
else | |
echo "[$timestamp] Error: Did not find any nodes to label." | |
exit 1 | |
fi | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment