Skip to content

Instantly share code, notes, and snippets.

@fvigotti
Created September 13, 2018 07:21
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fvigotti/c30c8fb57df8a24ea6bd7a62e89d860b to your computer and use it in GitHub Desktop.
Save fvigotti/c30c8fb57df8a24ea6bd7a62e89d860b to your computer and use it in GitHub Desktop.
graceful docker/kubernetes shutdown sample
[Unit]
Description=Docker Application Container Engine
Documentation=http://docs.docker.com
After=network.target docker.socket
Wants=docker.socket
[Service]
Type=notify
Environment=GOTRACEBACK=crash
ExecReload=/bin/kill -s HUP $MAINPID
Delegate=yes
KillMode=process
TimeoutStopSec=1200
# user root is required because kubectl is configured for root user! ( elseway those scripts cannot run "kubectl error" )
User=root
Group=root
ExecStop=/usr/bin/stopdocker_kube.sh
ExecStartPost=/usr/bin/startdocker_kube.sh
ExecStart=/usr/bin/docker/dockerd
TasksMax=infinity
LimitNOFILE=1048576
LimitNPROC=1048576
LimitCORE=infinity
# increaset timeoutstart because the script ExecStartPost must uncordon node and could take a while..
TimeoutStartSec=8min
#Restart=on-abnormal
Restart=on-failure
StartLimitBurst=3
StartLimitInterval=60s
[Install]
WantedBy=multi-user.target
#!/usr/bin/env bash
# script to be execute before ( or even after?) docker start
# in order to untaint the node that has been tainted before docker shutdown
HOSTNAME="$(hostname)"
NODENAME=${NODENAME:-$HOSTNAME}
export CORDON_LOCKFILE="/etc/_docker-kube-shutdown"
export PRGRAM_NAME="startdocker"
export _WAIT_ATTEMPTS=20
syslog() {
echo "$PRGRAM_NAME[$$]" "$@"
logger -p user.notice -t "$PRGRAM_NAME[$$]" "$@"
}
docker rm kubelet || syslog "kubelet already running, not removed"
if [ -f /etc/_docker-kube-shutdown ] ; then
n=0
until [ $n -ge $_WAIT_ATTEMPTS ]; do
kubectl uncordon "${NODENAME}"
status=$?
if [ "$status" -ne 0 ]; then
syslog "failed to uncordon node, attempt $n/$_WAIT_ATTEMPTS"
else
rm -f "$CORDON_LOCKFILE"
syslog "node $NODENAME uncordoned! in $n attempts"
break ;
fi
n=$[$n+1]
sleep 15
done
fi
#!/usr/bin/env bash
## script to be executed in systemd settings as `ExecStop=`
## this is an improved version of stopdocker,
## will taint the node and taint it with noexecute
## then the taint must be removed after docker is restarted
# THIS WILL ALLOW GRACEFUL NODE SHUTDOWN !
HOSTNAME="$(hostname)"
export NODENAME=${NODENAME:-$HOSTNAME}
export PRGRAM_NAME="stopdocker"
export CORDON_LOCKFILE="/etc/_docker-kube-shutdown"
syslog() {
echo "$PRGRAM_NAME[$$]" "$@"
logger -p user.notice -t "$PRGRAM_NAME[$$]" "$@"
}
# add taint to shutdown pods ( seems not necessary yet.. )
#kubectl taint nodes k8s-04 operations=shutdown:NoExecute
#kubectl taint nodes k8s-04 operations:NoExecute-
get_terminating_pods(){
kubectl -n kube-system get pods -o wide | grep "${NODENAME}" | grep "Terminating"
}
touch "$CORDON_LOCKFILE"
kubectl drain --ignore-daemonsets --delete-local-data --force --timeout 10m "${NODENAME}"
sleep 2 ;
TERMINATION_FAILURE="$(get_terminating_pods)"
if [ ! -z "$TERMINATION_FAILURE" ]; then
syslog "ERROR not all pods has been evicted! $TERMINATION_FAILURE"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment