sallyom/time-skew.sh

## time-skew.sh
#!/bin/bash

set -euxo pipefail

final-check () {
  if
    ! oc wait co --all --for='condition=Available=True' --timeout=20s 1>/dev/null || \
    ! oc wait co --all --for='condition=Progressing=False' --timeout=20s 1>/dev/null || \
    ! oc wait co --all --for='condition=Degraded=False' --timeout=20s 1>/dev/null; then
      echo "Some ClusterOperators Degraded=True,Progressing=True,or Available=False"
      oc get co
      exit 1
  else
    echo "All ClusterOperators reporting healthy"
    oc get co
    oc get clusterversion
  fi
  exit 0
}
trap final-check EXIT

approveCSRs () {
  pendingCSRs=$(oc get csr | grep Pending | wc -l)
  if [ $pendingCSRs -ne 0 ]; then
    echo "Approving pending csrs"
    oc get csr -o name | xargs oc adm certificate approve
    sleep 30
  fi
}

# TODO: Need to improve this nodesReady check
checkNodesReady () {
  nodesReady=0
  retries=0
  while [ $nodesReady -ne 5 ] && [ $retries -lt 100 ]; do
    approveCSRs
    nodesReady=$(oc wait --for=condition=Ready node --all --timeout=30s| wc -l)
    if [ $nodesReady -eq 5 ]; then
      echo "All nodes Ready"
    fi
    (( retries++ ))
  done
  if [ $nodesReady -ne 5 ]; then
    echo "Some nodes NotReady"
    oc get nodes
    exit 1
  fi
}

jumpstartNodes () {
  approveCSRs
  # jumpstart any stuck nodes, during recovery nodes will be rebooted
  nodesDisabled=$(oc get nodes | grep "NotReady" | awk '{ print $1 }')
  if [ ! -z "${nodesDisabled}" ]; then
    nodeDisabledList=( $nodesDisabled )
    for i in "${nodeDisabledList[@]}"
    do
      echo "Restarting stuck node ${i}..."
      sudo virsh destroy "${i}"
      sleep 30
      sudo virsh start "${i}"
      sleep 60
    done
    checkNodesReady
  fi
}

checkDegradedCOs () {
  retries=0
  # image-pruner job in openshift-image-registry namespace may be stuck due to time skew. This would not
  # happen if time was progressing naturally. Kill image-prune jobs here.
  oc delete jobs --all -n openshift-image-registry
  # supposedly fixed but still lingering pod trip up insights-operator: https://bugzilla.redhat.com/show_bug.cgi?id=1919778
  oc delete pods --all -n openshift-insights --force --grace-period=0
  sleep 10
  while ! oc wait co --all --for='condition=Degraded=False' --timeout=20s && [ $retries -lt 100 ]; do
    (( retries++ ))
  done
}

checkProgressingCOs () {
  retries=0
  # image-pruner job in openshift-image-registry namespace may be stuck due to time skew. This would not
  # happen if time was progressing naturally. Kill image-prune jobs here.
  oc delete jobs --all -n openshift-image-registry
  sleep 10
  while ! oc wait co --all --for='condition=Progressing=False' --timeout=20s && [ $retries -lt 100 ]; do
    jumpstartNodes
    (( retries++ ))
  done
}

checkAvailableCOs () {
  retries=0
  while ! oc wait co --all --for='condition=Available=True' --timeout=20s && [ $retries -lt 100 ]; do
    jumpstartNodes
    (( retries++ ))
  done
}

sudo systemctl stop chronyd

SKEW=${1:-+400d}

OC=${OC:-oc}
SSH=${SSH:-ssh}

control_nodes=$( ${OC} get nodes --selector='node-role.kubernetes.io/master' --template='{{ range $index, $_ := .items }}{{ range .status.addresses }}{{ if (eq .type "InternalIP") }}{{ if $index }} {{end }}{{ .address }}{{ end }}{{ end }}{{ end }}' )
compute_nodes=$( ${OC} get nodes --selector='!node-role.kubernetes.io/master' --template='{{ range $index, $_ := .items }}{{ range .status.addresses }}{{ if (eq .type "InternalIP") }}{{ if $index }} {{end }}{{ .address }}{{ end }}{{ end }}{{ end }}' )

function run-on {
        for n in ${1}; do ${SSH} core@"${n}" sudo 'bash -eEuxo pipefail' <<< ${2}; done
}

ssh-keyscan -H ${control_nodes} ${compute_nodes} >> ~/.ssh/known_hosts

run-on "${control_nodes} ${compute_nodes}" "systemctl stop kubelet"

# Destroy all containers on compute_nodes.
run-on "${compute_nodes}" "crictl rm --all -f"
# Destroy all containers on control_nodes except KAS and etcd.
run-on "${control_nodes}" '
kas_id=$( crictl ps --name="^kube-apiserver$" -q )
# [[ -n "${kas_id}" ]]
etcd_id=$( crictl ps --name="^etcd$" -q )
# [[ -n "${etcd_id}" ]]
other_ids=$( crictl ps --all -q | ( grep -v -e "${kas_id}" -e "${etcd_id}" || true ) )
if [ -n "${other_ids}" ]; then
        crictl rm -f ${other_ids}
fi;
'

# Delete all pods, especially the operators. Makes sure it needs KCM and KS working when starting again.
${OC} delete pods --all -n openshift-kube-apiserver-operator --force --grace-period=0
${OC} delete pods --all -n openshift-kube-apiserver --force --grace-period=0
${OC} delete pods --all -n openshift-etcd-operator --force --grace-period=0
${OC} delete pods --all -n openshift-etcd --force --grace-period=0
${OC} delete pods -A --all --force --grace-period=0

# Delete all clusteroperator status to avoid stale status when the operator pod isn't started.
export bearer=$( oc -n openshift-cluster-version serviceaccounts get-token default ) && export server=$( oc whoami --show-server ) && for co in $( oc get co --template='{{ range .items }}{{ printf "%s\n" .metadata.name }}{{ end }}' ); do curl -k -X PATCH -H "Authorization: Bearer ${bearer}" -H "Accept: application/json" -H "Content-Type: application/merge-patch+json" ${server}/apis/config.openshift.io/v1/clusteroperators/${co}/status -d '{"status": null}' && echo; done

# Destroy the remaining containers on control_nodes
run-on "${control_nodes}" "crictl rm --all -f"

run-on "${control_nodes} ${compute_nodes}" "systemctl disable chronyd --now"

# Set time only as a difference to the synced time so we don't introduce a skew between the machines which would break etcd, leader election and others.
run-on "${control_nodes} ${compute_nodes}" "
timedatectl status
timedatectl set-ntp false
timedatectl set-time '${SKEW}'
timedatectl status
"
run-on "${control_nodes} ${compute_nodes}" "sleep 10 && systemctl start kubelet"

# now set date for host
sudo timedatectl set-time ${SKEW}

# wait for connectivity
# allow 4 minutes for date to propagate and to regain connectivity
set +e
retries=0
while ! oc get csr && [ $retries -lt 25 ]; do
  if [ $retries -eq 24 ]; then
    exit 1
  fi
  sleep 10
  (( retries++ ))
done

set +eu
checkNodesReady
checkAvailableCOs
checkProgressingCOs
checkDegradedCOs
	#!/bin/bash

	set -euxo pipefail

	final-check () {
	if
	! oc wait co --all --for='condition=Available=True' --timeout=20s 1>/dev/null \|\| \
	! oc wait co --all --for='condition=Progressing=False' --timeout=20s 1>/dev/null \|\| \
	! oc wait co --all --for='condition=Degraded=False' --timeout=20s 1>/dev/null; then
	echo "Some ClusterOperators Degraded=True,Progressing=True,or Available=False"
	oc get co
	exit 1
	else
	echo "All ClusterOperators reporting healthy"
	oc get co
	oc get clusterversion
	fi
	exit 0
	}
	trap final-check EXIT

	approveCSRs () {
	pendingCSRs=$(oc get csr \| grep Pending \| wc -l)
	if [ $pendingCSRs -ne 0 ]; then
	echo "Approving pending csrs"
	oc get csr -o name \| xargs oc adm certificate approve
	sleep 30
	fi
	}

	# TODO: Need to improve this nodesReady check
	checkNodesReady () {
	nodesReady=0
	retries=0
	while [ $nodesReady -ne 5 ] && [ $retries -lt 100 ]; do
	approveCSRs
	nodesReady=$(oc wait --for=condition=Ready node --all --timeout=30s\| wc -l)
	if [ $nodesReady -eq 5 ]; then
	echo "All nodes Ready"
	fi
	(( retries++ ))
	done
	if [ $nodesReady -ne 5 ]; then
	echo "Some nodes NotReady"
	oc get nodes
	exit 1
	fi
	}

	jumpstartNodes () {
	approveCSRs
	# jumpstart any stuck nodes, during recovery nodes will be rebooted
	nodesDisabled=$(oc get nodes \| grep "NotReady" \| awk '{ print $1 }')
	if [ ! -z "${nodesDisabled}" ]; then
	nodeDisabledList=( $nodesDisabled )
	for i in "${nodeDisabledList[@]}"
	do
	echo "Restarting stuck node ${i}..."
	sudo virsh destroy "${i}"
	sleep 30
	sudo virsh start "${i}"
	sleep 60
	done
	checkNodesReady
	fi
	}

	checkDegradedCOs () {
	retries=0
	# image-pruner job in openshift-image-registry namespace may be stuck due to time skew. This would not
	# happen if time was progressing naturally. Kill image-prune jobs here.
	oc delete jobs --all -n openshift-image-registry
	# supposedly fixed but still lingering pod trip up insights-operator: https://bugzilla.redhat.com/show_bug.cgi?id=1919778
	oc delete pods --all -n openshift-insights --force --grace-period=0
	sleep 10
	while ! oc wait co --all --for='condition=Degraded=False' --timeout=20s && [ $retries -lt 100 ]; do
	(( retries++ ))
	done
	}

	checkProgressingCOs () {
	retries=0
	# image-pruner job in openshift-image-registry namespace may be stuck due to time skew. This would not
	# happen if time was progressing naturally. Kill image-prune jobs here.
	oc delete jobs --all -n openshift-image-registry
	sleep 10
	while ! oc wait co --all --for='condition=Progressing=False' --timeout=20s && [ $retries -lt 100 ]; do
	jumpstartNodes
	(( retries++ ))
	done
	}

	checkAvailableCOs () {
	retries=0
	while ! oc wait co --all --for='condition=Available=True' --timeout=20s && [ $retries -lt 100 ]; do
	jumpstartNodes
	(( retries++ ))
	done
	}

	sudo systemctl stop chronyd

	SKEW=${1:-+400d}

	OC=${OC:-oc}
	SSH=${SSH:-ssh}

	control_nodes=$( ${OC} get nodes --selector='node-role.kubernetes.io/master' --template='{{ range $index, $_ := .items }}{{ range .status.addresses }}{{ if (eq .type "InternalIP") }}{{ if $index }} {{end }}{{ .address }}{{ end }}{{ end }}{{ end }}' )
	compute_nodes=$( ${OC} get nodes --selector='!node-role.kubernetes.io/master' --template='{{ range $index, $_ := .items }}{{ range .status.addresses }}{{ if (eq .type "InternalIP") }}{{ if $index }} {{end }}{{ .address }}{{ end }}{{ end }}{{ end }}' )

	function run-on {
	for n in ${1}; do ${SSH} core@"${n}" sudo 'bash -eEuxo pipefail' <<< ${2}; done
	}

	ssh-keyscan -H ${control_nodes} ${compute_nodes} >> ~/.ssh/known_hosts

	run-on "${control_nodes} ${compute_nodes}" "systemctl stop kubelet"

	# Destroy all containers on compute_nodes.
	run-on "${compute_nodes}" "crictl rm --all -f"
	# Destroy all containers on control_nodes except KAS and etcd.
	run-on "${control_nodes}" '
	kas_id=$( crictl ps --name="^kube-apiserver$" -q )
	# [[ -n "${kas_id}" ]]
	etcd_id=$( crictl ps --name="^etcd$" -q )
	# [[ -n "${etcd_id}" ]]
	other_ids=$( crictl ps --all -q \| ( grep -v -e "${kas_id}" -e "${etcd_id}" \|\| true ) )
	if [ -n "${other_ids}" ]; then
	crictl rm -f ${other_ids}
	fi;
	'

	# Delete all pods, especially the operators. Makes sure it needs KCM and KS working when starting again.
	${OC} delete pods --all -n openshift-kube-apiserver-operator --force --grace-period=0
	${OC} delete pods --all -n openshift-kube-apiserver --force --grace-period=0
	${OC} delete pods --all -n openshift-etcd-operator --force --grace-period=0
	${OC} delete pods --all -n openshift-etcd --force --grace-period=0
	${OC} delete pods -A --all --force --grace-period=0

	# Delete all clusteroperator status to avoid stale status when the operator pod isn't started.
	export bearer=$( oc -n openshift-cluster-version serviceaccounts get-token default ) && export server=$( oc whoami --show-server ) && for co in $( oc get co --template='{{ range .items }}{{ printf "%s\n" .metadata.name }}{{ end }}' ); do curl -k -X PATCH -H "Authorization: Bearer ${bearer}" -H "Accept: application/json" -H "Content-Type: application/merge-patch+json" ${server}/apis/config.openshift.io/v1/clusteroperators/${co}/status -d '{"status": null}' && echo; done

	# Destroy the remaining containers on control_nodes
	run-on "${control_nodes}" "crictl rm --all -f"

	run-on "${control_nodes} ${compute_nodes}" "systemctl disable chronyd --now"

	# Set time only as a difference to the synced time so we don't introduce a skew between the machines which would break etcd, leader election and others.
	run-on "${control_nodes} ${compute_nodes}" "
	timedatectl status
	timedatectl set-ntp false
	timedatectl set-time '${SKEW}'
	timedatectl status
	"
	run-on "${control_nodes} ${compute_nodes}" "sleep 10 && systemctl start kubelet"

	# now set date for host
	sudo timedatectl set-time ${SKEW}

	# wait for connectivity
	# allow 4 minutes for date to propagate and to regain connectivity
	set +e
	retries=0
	while ! oc get csr && [ $retries -lt 25 ]; do
	if [ $retries -eq 24 ]; then
	exit 1
	fi
	sleep 10
	(( retries++ ))
	done

	set +eu
	checkNodesReady
	checkAvailableCOs
	checkProgressingCOs
	checkDegradedCOs