Skip to content

Instantly share code, notes, and snippets.

@cfergeau
Last active January 13, 2020 08:05
Show Gist options
  • Save cfergeau/fc4ac5379a120dad1381e8ba1fc4789d to your computer and use it in GitHub Desktop.
Save cfergeau/fc4ac5379a120dad1381e8ba1fc4789d to your computer and use it in GitHub Desktop.

Dealing with expired certificates

  • Start the crc instance with the expired certificates (even if it's non-functional)
  • Download both scripts to the machine running the crc instance
  • Run fix-certs.sh
  • Once it successfully runs, the crc instance should become operational after a few minutes

Note: This script has been tested on linux and macos

#!/bin/bash
set -x
SSH="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ~/.crc/machines/crc/id_rsa"
HOST=$(crc ip)
function run_remote_command {
if [ $# -lt 1 ]; then
echo "Missing command"
exit 1
fi
${SSH} core@${HOST} $@
}
function run_remote_script {
if [ $# -ne 1 ]; then
echo "Missing script or too many arguments"
exit 1
fi
if ! copy_file "$1" $(basename $1); then
return
fi
run_remote_command "sudo sh $(basename $1) && rm $(basename $1)"
}
function copy_file {
if [ $# -ne 2 ]; then
echo "Wrong number of arguments"
exit 1
fi
rsync -avP -e "${SSH}" "$1" "core@${HOST}:$2"
}
# check bundle version ? must be a 4.2 bundle
# check cert expiry date
if [ ! -f ~/.crc/machines/crc/id_rsa ]; then
echo "crc machine was never started, this script cannot currently recover it"
exit 1
fi
if ! run_remote_command 'stat /var/lib/kubelet/config.json'; then
if [ -z $PULL_SECRET_FILE ]; then
echo "Pull secret is missing, you need to set PULL_SECRET_FILE in your environment before running this script"
exit 1
else
if ! copy_file "$PULL_SECRET_FILE" config.json; then
echo "Failed to copy pull secret"
exit 1
fi
run_remote_command "sudo mv config.json /var/lib/kubelet/"
fi
fi
# if kubelet is already running, this will be a noop
run_remote_command 'sudo systemctl start kubelet'
if run_remote_script regenerate-certs.sh; then
echo "Cluster certificates were successfully regenerated, the cluster now needs several minutes to settle"
# check cluster is ready with ??
else
echo "Error trying to regenerate cluster certificates"
exit 1
fi
#!/bin/bash
# This script follows https://docs.openshift.com/container-platform/4.1/backup_and_restore/disaster_recovery/scenario-3-expired-certs.html
# https://github.com/openshift/openshift-docs/blob/master/modules/dr-recover-expired-control-plane-certs.adoc
set -x
export RELEASE_IMAGE="quay.io/openshift-release-dev/ocp-release:4.2.2"
export KAO_IMAGE=$( oc adm release info --registry-config='/var/lib/kubelet/config.json' "${RELEASE_IMAGE}" --image-for=cluster-kube-apiserver-operator )
podman pull --authfile=/var/lib/kubelet/config.json "${KAO_IMAGE}"
RECOVERY_OUTPUT=$(podman run -it --network=host -v /etc/kubernetes/:/etc/kubernetes/:Z --entrypoint=/usr/bin/cluster-kube-apiserver-operator "${KAO_IMAGE}" recovery-apiserver create)
KUBECONFIG=$(echo $RECOVERY_OUTPUT|grep "export KUBECONFIG"|tail -1 | sed "s/^.*export KUBECONFIG=//" | tr -d '\r\n')
export KUBECONFIG
#until oc get namespace kube-system 2>/dev/null 1>&2; do echo 'Waiting for recovery apiserver to come up.'; sleep 1; done
until oc get namespace kube-system ; do echo 'Waiting for recovery apiserver to come up.'; sleep 1; done
podman run -it --network=host -v /etc/kubernetes/:/etc/kubernetes/:Z --entrypoint=/usr/bin/cluster-kube-apiserver-operator "${KAO_IMAGE}" regenerate-certificates
oc patch kubeapiserver cluster -p='{"spec": {"forceRedeploymentReason": "recovery-'"$( date --rfc-3339=ns )"'"}}' --type=merge
oc patch kubecontrollermanager cluster -p='{"spec": {"forceRedeploymentReason": "recovery-'"$( date --rfc-3339=ns )"'"}}' --type=merge
oc patch kubescheduler cluster -p='{"spec": {"forceRedeploymentReason": "recovery-'"$( date --rfc-3339=ns )"'"}}' --type=merge
/usr/local/bin/recover-kubeconfig.sh >kubeconfig
mv kubeconfig /etc/kubernetes/kubeconfig
oc get configmap kube-apiserver-to-kubelet-client-ca -n openshift-kube-apiserver-operator --template='{{ index .data "ca-bundle.crt" }}' > /etc/kubernetes/kubelet-ca.crt
touch /run/machine-config-daemon-force
systemctl stop kubelet
rm -rf /var/lib/kubelet/pki /var/lib/kubelet/kubeconfig
crictl stopp $(sudo crictl pods -q)
crictl rmp $(sudo crictl pods -q)
systemctl start kubelet
until oc get csr | grep Pending; do echo 'Waiting for first CSR request.'; sleep 1; done
for CSR in $(oc get csr | grep Pending | awk '{print $1}'); do
oc adm certificate approve $CSR;
done
until oc get csr | grep Pending; do echo 'Waiting for second CSR request.'; sleep 1; done
for CSR in $(oc get csr | grep Pending | awk '{print $1}'); do
oc adm certificate approve $CSR;
done
#echo "kubeconfig: $KUBECONFIG"
podman run -it --network=host -v /etc/kubernetes/:/etc/kubernetes/:Z --entrypoint=/usr/bin/cluster-kube-apiserver-operator "${KAO_IMAGE}" recovery-apiserver destroy
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment