DominicWatson/Dockerfile

## Dockerfile
FROM bitnami/kubectl as kubectl
FROM ubuntu

COPY --from=kubectl /opt/bitnami/kubectl/bin/kubectl /usr/local/bin/
COPY scripts/* /usr/bin/
RUN chmod +x /usr/bin/k8sdrain.sh && chmod +x /usr/bin/job.sh

CMD [ "/usr/bin/job.sh" ]

## job.sh
#!/bin/bash

# Hacktastic method to query k8s api to trigger auto "safe drain" script from here: https://gist.github.com/juliohm1978/fcfd21b26f9431c01978
# We:
# * Parse autoscaler logs to detect nodes it wants to drain in the last 5 mins
# * Slim these down to nodes that do not have critical blockers
# * Slim these down to nodes that are blocked due to PDBs (PodDisruptionBudgets)
# * Check they are not already cordoned
# * Perform the k8sdrain.sh script to cordon the node and perform rolling restarts on all deployments & replicasets affected
#
# This is quite specific to our needs, but hopefully offers someone a starting point to implement something that suits them

AUTOSCALER_POD=$( kubectl get pods -n kube-system -l app="cluster-autoscaler" -o custom-columns=:metadata.name --no-headers )
CANDIDATES="$( kubectl logs -n kube-system $AUTOSCALER_POD | grep -i " for removal\$" | tail -n 5 )"
SINCE=$( date '+%H:%M:%S.%N' -d "5 minutes ago" )

IFS=$'\n'
for CANDIDATE in $CANDIDATES
do
  NODE=$( echo $CANDIDATE | awk '{print $5}' )
  TS=$( echo $CANDIDATE | awk '{print $2}' )

  if [[ $TS > $SINCE ]] ; then
    CRITICALPROBS=$( kubectl logs -n kube-system $AUTOSCALER_POD | grep -i "$NODE" | grep "cannot be removed: non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present" | wc -l )
    if [[ $CRITICALPROBS=="0" ]] ; then
      PDBPROBS=$( kubectl logs -n kube-system $AUTOSCALER_POD | grep -i "$NODE" | grep "cannot be removed: not enough pod disruption budget to move" | wc -l )
      if [[ $PDBPROBS > 0 ]] ; then
        NODEINFO=$( kubectl get node $NODE | grep "$NODE" )
        CORDONED=$( echo $NODEINFO | grep -i "SchedulingDisabled" | wc -l )
        if [[ -n $NODEINFO && "$CORDONED"=="0" ]] ; then
          echo "Found a node for safe draining. Draining single node: $NODE..."

          # where /usr/bin/k8sdrain.sh is the gist from here: https://gist.github.com/juliohm1978/fcfd21b26f9431c01978
          /usr/bin/k8sdrain.sh $NODE restart
          exit 0
        fi
      fi
    fi
  fi
done

## kubernetes-manifests.yaml
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: autoscaler-drain-helper-role
rules:
- apiGroups:
  - ""
  - "extensions"
  - "apps"
  resources:
  - nodes
  - endpoints
  - pods
  - pods/log
  - deployments
  - statefulsets
  verbs:
  - get
  - list
  - watch
  - update
  - patch
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: autoscaler-drain-helper
  namespace: autoscaler-drain-helper
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: autoscaler-drain-helper-api-access
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: autoscaler-drain-helper-role
subjects:
- kind: ServiceAccount
  name: autoscaler-drain-helper
  namespace: autoscaler-drain-helper
---
apiVersion: batch/v1
kind: CronJob
metadata:
  name: autoscaler-drain-helper
  namespace: autoscaler-drain-helper
spec:
  schedule: "*/15 * * * *"
  jobTemplate:
    spec:
      template:
        metadata:
          labels:
            app: autoscaler-drain-helper
          annotations:
            build_id: "$CI_JOB_ID"
        spec:
          serviceAccountName: autoscaler-drain-helper
          imagePullSecrets:
            - name: gitlab-registry
          restartPolicy: Never
          containers:
          - name: autoscaler-drain-helper-container
            env:
              {{SECRETS}}
            image: $CI_REGISTRY_IMAGE:$CI_REGISTRY_TAG
            # ^^ this bit specific to our Gitlab setup, but basically the built image from the Dockerfile above
	FROM bitnami/kubectl as kubectl
	FROM ubuntu

	COPY --from=kubectl /opt/bitnami/kubectl/bin/kubectl /usr/local/bin/
	COPY scripts/* /usr/bin/
	RUN chmod +x /usr/bin/k8sdrain.sh && chmod +x /usr/bin/job.sh

	CMD [ "/usr/bin/job.sh" ]
	#!/bin/bash

	# Hacktastic method to query k8s api to trigger auto "safe drain" script from here: https://gist.github.com/juliohm1978/fcfd21b26f9431c01978
	# We:
	# * Parse autoscaler logs to detect nodes it wants to drain in the last 5 mins
	# * Slim these down to nodes that do not have critical blockers
	# * Slim these down to nodes that are blocked due to PDBs (PodDisruptionBudgets)
	# * Check they are not already cordoned
	# * Perform the k8sdrain.sh script to cordon the node and perform rolling restarts on all deployments & replicasets affected
	#
	# This is quite specific to our needs, but hopefully offers someone a starting point to implement something that suits them

	AUTOSCALER_POD=$( kubectl get pods -n kube-system -l app="cluster-autoscaler" -o custom-columns=:metadata.name --no-headers )
	CANDIDATES="$( kubectl logs -n kube-system $AUTOSCALER_POD \| grep -i " for removal\$" \| tail -n 5 )"
	SINCE=$( date '+%H:%M:%S.%N' -d "5 minutes ago" )

	IFS=$'\n'
	for CANDIDATE in $CANDIDATES
	do
	NODE=$( echo $CANDIDATE \| awk '{print $5}' )
	TS=$( echo $CANDIDATE \| awk '{print $2}' )

	if [[ $TS > $SINCE ]] ; then
	CRITICALPROBS=$( kubectl logs -n kube-system $AUTOSCALER_POD \| grep -i "$NODE" \| grep "cannot be removed: non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present" \| wc -l )
	if [[ $CRITICALPROBS=="0" ]] ; then
	PDBPROBS=$( kubectl logs -n kube-system $AUTOSCALER_POD \| grep -i "$NODE" \| grep "cannot be removed: not enough pod disruption budget to move" \| wc -l )
	if [[ $PDBPROBS > 0 ]] ; then
	NODEINFO=$( kubectl get node $NODE \| grep "$NODE" )
	CORDONED=$( echo $NODEINFO \| grep -i "SchedulingDisabled" \| wc -l )
	if [[ -n $NODEINFO && "$CORDONED"=="0" ]] ; then
	echo "Found a node for safe draining. Draining single node: $NODE..."

	# where /usr/bin/k8sdrain.sh is the gist from here: https://gist.github.com/juliohm1978/fcfd21b26f9431c01978
	/usr/bin/k8sdrain.sh $NODE restart
	exit 0
	fi
	fi
	fi
	fi
	done
	kind: ClusterRole
	apiVersion: rbac.authorization.k8s.io/v1
	metadata:
	name: autoscaler-drain-helper-role
	rules:
	- apiGroups:
	- ""
	- "extensions"
	- "apps"
	resources:
	- nodes
	- endpoints
	- pods
	- pods/log
	- deployments
	- statefulsets
	verbs:
	- get
	- list
	- watch
	- update
	- patch
	---
	apiVersion: v1
	kind: ServiceAccount
	metadata:
	name: autoscaler-drain-helper
	namespace: autoscaler-drain-helper
	---
	apiVersion: rbac.authorization.k8s.io/v1
	kind: ClusterRoleBinding
	metadata:
	name: autoscaler-drain-helper-api-access
	roleRef:
	apiGroup: rbac.authorization.k8s.io
	kind: ClusterRole
	name: autoscaler-drain-helper-role
	subjects:
	- kind: ServiceAccount
	name: autoscaler-drain-helper
	namespace: autoscaler-drain-helper
	---
	apiVersion: batch/v1
	kind: CronJob
	metadata:
	name: autoscaler-drain-helper
	namespace: autoscaler-drain-helper
	spec:
	schedule: "/15 * * *"
	jobTemplate:
	spec:
	template:
	metadata:
	labels:
	app: autoscaler-drain-helper
	annotations:
	build_id: "$CI_JOB_ID"
	spec:
	serviceAccountName: autoscaler-drain-helper
	imagePullSecrets:
	- name: gitlab-registry
	restartPolicy: Never
	containers:
	- name: autoscaler-drain-helper-container
	env:
	{{SECRETS}}
	image: $CI_REGISTRY_IMAGE:$CI_REGISTRY_TAG
	# ^^ this bit specific to our Gitlab setup, but basically the built image from the Dockerfile above