Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save sunk818/032f71d86c9b219d71ef7627a0fefc41 to your computer and use it in GitHub Desktop.
Save sunk818/032f71d86c9b219d71ef7627a0fefc41 to your computer and use it in GitHub Desktop.
WIP: A Google Cloud startup-script to automatically revive preemptible compute instances.
#!/bin/bash
#
# GCloud startup script to auto-restart any instances with 'revive' tag.
# The calling machine must have Read/Write access to compute API!!
# I use this to reboot preemptible instances.
# Output is logged to /tmp/revive.log
indent() { sed 's/^/ /'; }
revive_instances() {
# Go through lines in the provided string
for line in "$1"; do
echo "$line"
# Instance name is the first word in the line.
instance_name=`echo "$line" | head -n1 | awk '{print $1}'`
instance_zone=`echo "$line" | head -n1 | awk '{print $2}'`
# Attempt to reboot the instance
echo "Rebooting '$instance_name' in zone '$instance_zone'..."
gcloud compute instances start "--zone=$instance_zone" "$instance_name"
done
}
auto_reviver () {
REVIVE_TAG="$1"
CHECK_INTERVAL="$2"
LOG_FILE="$3"
IFS=$'\n'
date +"%F %T: monitoring instances with revive tag '$REVIVE_TAG', interval $CHECK_INTERVAL" >> "$LOG_FILE"
while :; do
# Look for instances with "revive" in their name/tags and TERMINATED status
offline=`gcloud compute instances list --format='table(name,zone,status,tags.list())' | grep "$REVIVE_TAG" | grep "TERMINATED"`
if [[ ! -z "$offline" ]] ; then
# If we found some, reboot them
date +"%F %T: some instances are down." >> "$LOG_FILE"
revive_instances "$offline" | indent >> "$LOG_FILE"
fi
# Sleep for the check interval
sleep $CHECK_INTERVAL
done
}
# Make sure revive.log is readable by general users
printf '' >> "/tmp/revive.log"
chmod 644 "/tmp/revive.log"
# Run auto-reviver with tag "revive", check interval 2 minutes, logging
auto_reviver "revive" 120 "/tmp/revive.log"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment