ZimbiX/kube-run-pod.sh

## kube-run-pod.sh
#!/bin/bash

##################################################
# kube-run-pod
#
# This is a helper script for running a one-shot pod, typically as part of a
# deployment pipeline - e.g. to carry out a database migration.
#
# All arguments are required.
#
# This script creates the pod specified by the caller. It waits until the
# specified container has exited, or until a timeout elapses, and then kills
# the pod. This means the pod can have sidecar containers that don't terminate
# automatically, such as cloud-sql-proxy.
#
# If we time out, the exit status of this script is 1.
# Otherwise, the exit status is the exit status of the container from the pod.
##################################################

set -Eeuo pipefail

echo '--- Validating input'

cleanup() {
  echo "Cleaning up..."
  jobs -p | xargs kill &>/dev/null || true
}

trap cleanup EXIT

USAGE="USAGE:\n\nkube-run-pod --pod-name=db-migration --container-name=db-migration --timeout-seconds=60 the-thing.yaml"

usage() {
  echo -e "${USAGE}" >&2
  exit 1
}

abort() {
  echo "ERROR: $1" >&2
  exit 1
}

OPTS=$(getopt -o '' -l pod-name:: -l container-name:: -l timeout-seconds:: -n 'parse-options' -- "$@")

if [ $? != 0 ] ; then
  abort "Failed parsing options\n\n${USAGE}"
fi

eval set -- "$OPTS"

POD_NAME=""
CONTAINER_NAME=""
TIMEOUT_SECONDS=""
INIT_TIMEOUT_SECONDS=60

while true; do
  case "$1" in
    --pod-name        ) POD_NAME="$2";        shift; shift ;;
    --container-name  ) CONTAINER_NAME="$2";  shift; shift ;;
    --timeout-seconds ) TIMEOUT_SECONDS="$2"; shift; shift ;;
    -- )                                      shift; break ;;
    * ) break ;;
  esac
done

# after all the shifts above, we should only have a single positional argument left.
if [ $# -ne 1 ]; then
  usage
fi

POD_SPEC=$1

if
[ -z "$POD_NAME" ] ||
[ -z "$POD_SPEC" ] ||
[ -z "$CONTAINER_NAME" ] ||
[ -z "$TIMEOUT_SECONDS" ]
then
  usage
fi

# Make sure the file exists
if ! test -f "$POD_SPEC"; then
  abort "pod spec file $POD_SPEC does not exist."
fi

echo '+++ Summary'

echo "Pod name: ${POD_NAME}"
echo "Spec file: ${POD_SPEC}"
echo "Container name: ${CONTAINER_NAME}"
echo "Timeout seconds: ${TIMEOUT_SECONDS}"

echo '--- Validating environment'

# This script has cleanup at the bottom, so we expect the pod not to exist.
# If it does still exist, it may be unsafe to proceed - perhaps this script has
# a bug whereby it's not cleaning it up properly, or concurrency groups aren't
# configured and a different instance of the deploy agent is currently running it.
#
# Regardless, we bail out here and force the developer to intervene rather than
# risk interrupting a pod that could be running a database migration.
if kubectl get pod "${POD_NAME}" &>/dev/null; then
  kubectl get pod "${POD_NAME}" -o wide
  abort "Found an existing pod named ${POD_NAME}; this shouldn't happen. Check the status of the pod and delete it manually if it's safe."
fi


echo '--- Running pod'

set -x

# Run the pod
kubectl apply -f "${POD_SPEC}"

# Wait until the pod is ready for us to tail its logs.
# If it's not ready after INIT_TIMEOUT_SECONDS, it's likely that it has
# ErrImagePull or CreateContainerConfigError state. In this case, we'll delete
# the pod after printing its status.
if ! kubectl wait \
    --for=condition=ContainersReady \
    --timeout="${INIT_TIMEOUT_SECONDS}"s \
    pod "${POD_NAME}"; then

  set +x

  # Output the pod status
  kubectl get pod -o yaml "${POD_NAME}" || true

  # Delete the pod - this should be safe if we haven't even successfully started it.
  kubectl delete pod "${POD_NAME}" || true

  abort "The pod was not ready after ${INIT_TIMEOUT_SECONDS}"
fi

echo '+++ Pod started'
# Show the state of the pod.
kubectl get pod "${POD_NAME}" -o wide

# Start showing the logs
kubectl logs --follow "${POD_NAME}" -c "${CONTAINER_NAME}" &

# Now, we will start a loop where we keep checking for a container status code,
# while keeping an eye on the clock. If we get a status code, we break out. If
# we've hit the timeout, we abort. Otherwise, we sleep for 1 second.

set +x
SECONDS=0
while true; do
  containerExitCode=$(kubectl get pod "${POD_NAME}" -o "jsonpath={..status.containerStatuses[?(@.name==\"${CONTAINER_NAME}\")].state.terminated.exitCode}")

  # The above JSON path query will mostly return an empty string until the
  # container has a status code,  but there's a short window of time between
  # the pod being marked Completed and the containerStatus block being fully
  # populated where we'll start getting a newline instead of an empty string.
  #
  # To handle this, we go ahead and check for something that actually looks
  # like an exit code.
  if [[ "$containerExitCode" =~ ^[0-9]+$ ]]; then
    break
  elif [ "$SECONDS" -gt "$TIMEOUT_SECONDS" ]; then
    echo -e '\nPod status:\n'
    kubectl get pod "${POD_NAME}" -o wide || true
    echo -e '\n=== BEWARE ===\n' >&2
    echo 'kube-run-pod does NOT automatically kill or cleanup the migration pod when we time out.' >&2
    echo 'It may still be running.' >&2
    abort "Timed out after ${TIMEOUT_SECONDS} seconds."
  else
    sleep 1
  fi
done

echo "Container exited with status code: ${containerExitCode}"

# Wait for the logs command to exit to make sure we see all the logs.
wait

echo '--- cleaning up'

set -x

kubectl delete pod "${POD_NAME}"

exit "$containerExitCode"
	#!/bin/bash

	##################################################
	# kube-run-pod
	#
	# This is a helper script for running a one-shot pod, typically as part of a
	# deployment pipeline - e.g. to carry out a database migration.
	#
	# All arguments are required.
	#
	# This script creates the pod specified by the caller. It waits until the
	# specified container has exited, or until a timeout elapses, and then kills
	# the pod. This means the pod can have sidecar containers that don't terminate
	# automatically, such as cloud-sql-proxy.
	#
	# If we time out, the exit status of this script is 1.
	# Otherwise, the exit status is the exit status of the container from the pod.
	##################################################

	set -Eeuo pipefail

	echo '--- Validating input'

	cleanup() {
	echo "Cleaning up..."
	jobs -p \| xargs kill &>/dev/null \|\| true
	}

	trap cleanup EXIT

	USAGE="USAGE:\n\nkube-run-pod --pod-name=db-migration --container-name=db-migration --timeout-seconds=60 the-thing.yaml"

	usage() {
	echo -e "${USAGE}" >&2
	exit 1
	}

	abort() {
	echo "ERROR: $1" >&2
	exit 1
	}

	OPTS=$(getopt -o '' -l pod-name:: -l container-name:: -l timeout-seconds:: -n 'parse-options' -- "$@")

	if [ $? != 0 ] ; then
	abort "Failed parsing options\n\n${USAGE}"
	fi

	eval set -- "$OPTS"

	POD_NAME=""
	CONTAINER_NAME=""
	TIMEOUT_SECONDS=""
	INIT_TIMEOUT_SECONDS=60

	while true; do
	case "$1" in
	--pod-name ) POD_NAME="$2"; shift; shift ;;
	--container-name ) CONTAINER_NAME="$2"; shift; shift ;;
	--timeout-seconds ) TIMEOUT_SECONDS="$2"; shift; shift ;;
	-- ) shift; break ;;
	* ) break ;;
	esac
	done

	# after all the shifts above, we should only have a single positional argument left.
	if [ $# -ne 1 ]; then
	usage
	fi

	POD_SPEC=$1

	if
	[ -z "$POD_NAME" ] \|\|
	[ -z "$POD_SPEC" ] \|\|
	[ -z "$CONTAINER_NAME" ] \|\|
	[ -z "$TIMEOUT_SECONDS" ]
	then
	usage
	fi

	# Make sure the file exists
	if ! test -f "$POD_SPEC"; then
	abort "pod spec file $POD_SPEC does not exist."
	fi

	echo '+++ Summary'

	echo "Pod name: ${POD_NAME}"
	echo "Spec file: ${POD_SPEC}"
	echo "Container name: ${CONTAINER_NAME}"
	echo "Timeout seconds: ${TIMEOUT_SECONDS}"

	echo '--- Validating environment'

	# This script has cleanup at the bottom, so we expect the pod not to exist.
	# If it does still exist, it may be unsafe to proceed - perhaps this script has
	# a bug whereby it's not cleaning it up properly, or concurrency groups aren't
	# configured and a different instance of the deploy agent is currently running it.
	#
	# Regardless, we bail out here and force the developer to intervene rather than
	# risk interrupting a pod that could be running a database migration.
	if kubectl get pod "${POD_NAME}" &>/dev/null; then
	kubectl get pod "${POD_NAME}" -o wide
	abort "Found an existing pod named ${POD_NAME}; this shouldn't happen. Check the status of the pod and delete it manually if it's safe."
	fi


	echo '--- Running pod'

	set -x

	# Run the pod
	kubectl apply -f "${POD_SPEC}"

	# Wait until the pod is ready for us to tail its logs.
	# If it's not ready after INIT_TIMEOUT_SECONDS, it's likely that it has
	# ErrImagePull or CreateContainerConfigError state. In this case, we'll delete
	# the pod after printing its status.
	if ! kubectl wait \
	--for=condition=ContainersReady \
	--timeout="${INIT_TIMEOUT_SECONDS}"s \
	pod "${POD_NAME}"; then

	set +x

	# Output the pod status
	kubectl get pod -o yaml "${POD_NAME}" \|\| true

	# Delete the pod - this should be safe if we haven't even successfully started it.
	kubectl delete pod "${POD_NAME}" \|\| true

	abort "The pod was not ready after ${INIT_TIMEOUT_SECONDS}"
	fi

	echo '+++ Pod started'
	# Show the state of the pod.
	kubectl get pod "${POD_NAME}" -o wide

	# Start showing the logs
	kubectl logs --follow "${POD_NAME}" -c "${CONTAINER_NAME}" &

	# Now, we will start a loop where we keep checking for a container status code,
	# while keeping an eye on the clock. If we get a status code, we break out. If
	# we've hit the timeout, we abort. Otherwise, we sleep for 1 second.

	set +x
	SECONDS=0
	while true; do
	containerExitCode=$(kubectl get pod "${POD_NAME}" -o "jsonpath={..status.containerStatuses[?(@.name==\"${CONTAINER_NAME}\")].state.terminated.exitCode}")

	# The above JSON path query will mostly return an empty string until the
	# container has a status code, but there's a short window of time between
	# the pod being marked Completed and the containerStatus block being fully
	# populated where we'll start getting a newline instead of an empty string.
	#
	# To handle this, we go ahead and check for something that actually looks
	# like an exit code.
	if [[ "$containerExitCode" =~ ^[0-9]+$ ]]; then
	break
	elif [ "$SECONDS" -gt "$TIMEOUT_SECONDS" ]; then
	echo -e '\nPod status:\n'
	kubectl get pod "${POD_NAME}" -o wide \|\| true
	echo -e '\n=== BEWARE ===\n' >&2
	echo 'kube-run-pod does NOT automatically kill or cleanup the migration pod when we time out.' >&2
	echo 'It may still be running.' >&2
	abort "Timed out after ${TIMEOUT_SECONDS} seconds."
	else
	sleep 1
	fi
	done

	echo "Container exited with status code: ${containerExitCode}"

	# Wait for the logs command to exit to make sure we see all the logs.
	wait

	echo '--- cleaning up'

	set -x

	kubectl delete pod "${POD_NAME}"

	exit "$containerExitCode"