Skip to content

Instantly share code, notes, and snippets.

@matti
Created November 13, 2022 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matti/ba79c211c24a845aea8a216eb198caef to your computer and use it in GitHub Desktop.
Save matti/ba79c211c24a845aea8a216eb198caef to your computer and use it in GitHub Desktop.
while true; do
nodesWithFailingPods=""
if failedPods=$(kubectl get pods -n default --no-headers | awk '$4 > 1 {print $1}') && [[ "$failedPods" != "" ]]; then
for failedPod in $failedPods; do
echo "$(date) pod failed too many times: '$failedPod'"
if failedPodNodeName=$(kubectl get pod -n default "$failedPod" -ojsonpath='{.spec.nodeName'}); then
nodesWithFailingPods="$nodesWithFailingPods $failedPodNodeName"
fi
done
if kubectl get nodes -o custom-columns=NAME:.metadata.name,TAINTS:.spec.taints --no-headers | grep "effect:NoSchedule key:node.kubernetes.io/unschedulable"; then
echo "$(date) already one node(s) draining"
else
for nodeWithFailingPods in $nodesWithFailingPods; do
while true; do
kubectl get node "$nodeWithFailingPods" || break
echo "$(date) node drain: '$nodeWithFailingPods'"
kubectl drain "$nodeWithFailingPods" --delete-emptydir-data --ignore-daemonsets --force || true
sleep 1
done
# only drain one node at a time
break
done
fi
else
echo "$(date) no failed pods"
fi
sleep 20
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment