Skip to content

Instantly share code, notes, and snippets.

@KylePDavis
Created March 19, 2015 15:08
Show Gist options
  • Save KylePDavis/3f8c511838a36f2528d7 to your computer and use it in GitHub Desktop.
Save KylePDavis/3f8c511838a36f2528d7 to your computer and use it in GitHub Desktop.
Self-contained auto-updating rsync-based backup script from a lifetime ago used to cross-sync data to local backup servers and then remote backup servers
#!/bin/sh
# Description:
# Automated backup script designed to support multiple backup servers.
#
# ChangeLog:
# 20130822 kdavis - Updated to use fully qualified hostname
# 20120125 kdavis - Minor updates for mysqldump over ssh so it can use the same security model as rsync, setting env vars from command line, and logging
# 20080610 kdavis - Added missing rsync error code handling to one of the backup methods that uses rsync internally
# 20080123 kdavis - Added more debugging; and...
# 20080119 kdavis - Added lock files/PID checks to ensure only one instance
# 20070911 kdavis - Finished initial version
# 20070820 kdavis - Started initial version
###############################################################################
# Set this to "true" to enable debugging messages, "false" to disable them; e.g., DEBUG=true ./backup.sh
[ -z $DEBUG ] && DEBUG=false
# Set this to "true" for normal operation, "false" to avoid actually running any commands that change things; e.g., EXEC=false ./backup.sh
[ -z $EXEC ] && EXEC=true
###############################################################################
# JOBS - Backup jobs are defined here and then processed (in order)
###############################################################################
# JOBS LIST FORMAT:
# NAME
# - Unique name; for logging and so that you can tell the difference between similar jobs
# HOST
# - Host or IP of the machine getting backed up (i.e., the server that the primary backup server will pull data from)
# TYPE
# - Type of backup, which will be supported by a section in this script (See SUPPORTED BACKUP TYPES)
# WHEN
# - What day of the week to run this backup (0-6, representing Sunday through Saturday, or * for daily)
# ARCHIVES
# - The number of archived copies of the backup to keep around
# BACKUP_SERVERS
# - Space separated list of servers to backup to, with an optional "@KB" at the end, where KB is the bandwidth limit
# - The first server will be the initiator of all of the backups from $HOST, and then it will push to the other servers listed (if any)
# - Ideally, this first server will be the one with the fastest network connection to $HOST.
#
# SUPPORTED BACKUP TYPES:
# rsync
# - Simple file backups using rsync (creates an exact, uncompressed copy on server)
# - We've specified the -F option in $RSYNC_OPTIONS, so you may provide a "/.rsync-filter" file on $HOST
# mysqldump
# - Text dump of all MySQL databases using mysqldump
# - Optionally you may pass a comma separated list of databases in parenthesis (MUST NOT CONTAIN SPACES)
#
# NOTES:
# The point of having this format in one centralized file is that it should
# allow for much better control over job scheduling, making it easier to avoid
# annoying scheduling conflicts.
#
# IMPORTANT:
# This format is primative, so the values for each field must not contain spaces,
# otherwise the "parser" will explode (except for BACKUP_SERVERS).
###############################################################################
JOBS='
# NAME HOST TYPE WHEN ARCHIVES BACKUP_SERVERS
Source_Files source.example.com rsync * 7 backups.example.com backups.remote.example.com
'
# TODO: the mysqldump appears to be working differently than the rsync...look into it, fix it, automate it, and copy tarballs to the windows backup directory...
###############################################################################
# SERVERS - A list of backup servers
###############################################################################
# Each server will run this script, and each will be checked for an updated
# version of this script before proceeding to allow for automatic updates
# between backup servers.
###############################################################################
SERVERS='
backups.rd.rcg.local
10.0.12.99
'
###############################################################################
# Quick and dirty way to setup the SSH keys
###############################################################################
# Sync the /etc/ssh/ directory from one backup server to the other backup servers.
#
# On each backup server, make sure that the root user will accept it's own key:
# cat /etc/ssh/ssh_host_rsa_key.pub >> /root/.ssh/authorized_keys2
# for HOST in `hostname` BACKUP_SERVER2 BACKUP_SERVER3; do
# ssh -i /etc/ssh/ssh_host_rsa_key $HOST
# done
#
# On each backup host, make sure that the root user will accept the backup server keys.
# for HOST in BACKUP_CLIENT1 BACKUP_CLIENT2; do
# cat /etc/ssh/ssh_host_rsa_key.pub | ssh root@$HOST "cat >> .ssh/authorized_keys2"
# done
#
# Also, you can do this a little differently if you tweak the variables below.
###############################################################################
# This is the directory where backups are stored on the backup severs
SERVERS_BACKUP_DIR=/backups
# The user with rights to backup things on each host, using SSH keys or IP based priveleges (for MySQL)
BACKUP_USER=root
# Global options for the commands used in the different backup types
SSH_OPTIONS="-i /root/.ssh/id_ecdsa"
RSYNC_OPTIONS="-vazSHF --partial --delete-during --delete-excluded --stats"
[ $DEBUG = "true" ] && RSYNC_OPTIONS="--itemize-changes $RSYNC_OPTIONS"
MYSQL_OPTIONS="--defaults-file=/etc/mysql/debian.cnf"
# Some mysqldump options for flexibility
MYSQLDUMP_OPTIONS="$MYSQL_OPTIONS --opt --single-transaction --quick -v"
MYSQLDUMP_PIPE="gzip -c -"
MYSQLDUMP_EXT="sql.gz"
# Some tar options used for archiving things
TAR_OPTIONS='-z'
TAR_EXT='tar.gz'
# These two regarding RSYNC_RSH are required to make rsync use ssh with a key properly
RSYNC_RSH="ssh $SSH_OPTIONS"
export RSYNC_RSH
###############################################################################
# Now for the core of the script (you probably won't need to tweak this)
###############################################################################
# Grab the hostname so that we can check if a job is meant to run from this host or not
THIS_HOST=`hostname -f`
# Get the weekday in numerical representation for today (where Sunday is 0)
WDAY_TODAY=`date +%w`
# Some self awareness variables used later on
CMD="$0"
CMD_NAME="$(basename "$CMD")"
CMD_DIR="$(cd "$(dirname "$CMD")" && pwd)"
# No mysqldump pipe comressor means we should pipe it through cat
[ -n "$MYSQLDUMP_PIPE" ] || MYSQLDUMP_PIPE="cat"
# A handy function for outputting log messages
logmsg(){
echo "`date`: $THIS_HOST: $*"
}
# A handy function for outputting debugging messages
debugmsg(){
$DEBUG && echo "DEBUG: `date`: $THIS_HOST: $*"
}
# Notify the user if exec has been disabled (which enables debugging)
$EXEC || DEBUG=true
$EXEC || debugmsg "*** COMMAND EXECUTION HAS BEEN DISABLED ***"
# Use and check PID-based lock file to ensure only one instance at a time
debugmsg "Checking to make sure that there isn't another instance running already."
$DEBUG && ps fauxww | sed 's/^/DEBUG: /'
LOCKFILE="/var/run/$BASENAME.pid"
if [ -f "$LOCKFILE" ]; then
LOCKPID=`cat $LOCKFILE`
if [ -d /proc/$LOCKPID ] && grep -q "/$CMD_NAME$" /proc/$LOCKPID/cmdline; then
logmsg "There is currently another copy still running with a PID of $LOCKPID. Only one instance is allowed!"
exit 128
else
debugmsg "-- A lock file for previous instance with PID of $LOCKPID was found, but it was stale, so it was ignored."
# logmsg "Ignoring stale lock file for a previous instance with PID of $LOCKPID." | tee -a "$JOB_LOG"
fi
fi
echo $$ > "$LOCKFILE" || logmsg "Unable to setup lock file. Multiple instance checking will not work!"
# First, we sanity check that this host is in the servers list
# This will guarantee us that we'll have at least one script to check against in the autoupdate code
if echo "$SERVERS" | grep -q "$THIS_HOST"; then
# Then, we go through the autoupdate code
debugmsg "Automatically updating to the latest version of this script."
# Look for a --skip-autoupdate option in case we've just updated to the latest version
if [ "$1" = "--just-autoupdated" ]; then
logmsg "Restarted using updated version."
else
# Go through servers and grab all of the copies of this backup script and track the newest one
NEWEST="$0"
logmsg "Searching for updated versions of the backup script ..."
for SERVER in $SERVERS; do
logmsg "-- Getting backup script from $SERVER ..."
TMPFILE=/tmp/$CMD_NAME-$$-autoupdate_$SERVER
# Get the file from the server
if [ "$SERVER" != "$THIS_HOST" ]; then
scp -qp $SSH_OPTIONS $BACKUP_USER@$SERVER:$CMD_DIR/$CMD_NAME $TMPFILE 2>&1
# Make sure that we got a file
if [ -s "$TMPFILE" ]; then
# Then check to see if it's newer than what we've got
if [ "$TMPFILE" -nt "$NEWEST" ]; then
$DEBUG && debugmsg "$TMPFILE is newer than $NEWEST:" && ls -alhrt $TMPFILE $NEWEST | sed 's/^/DEBUG: /'
NEWEST=$TMPFILE
fi
else
logmsg "---- ERROR getting backup script from $SERVER!"
fi
fi
done
# If we found a script that's newer than ours, then copy it over our current version and restart
if [ "$NEWEST" != "$0" ]; then
UPDATED_FROM=`echo $NEWEST | sed 's/.*-autoupdate_//g'`
logmsg "-- Restarting using an updated version from $UPDATED_FROM ..."
mv $0 $0.bak && cp $NEWEST $0 && rm /tmp/$CMD_NAME-$$-autoupdate_* 2>/dev/null
debugmsg "Differences between old version and new version:"
$DEBUG && diff -u $0.bak $0 | sed 's/^/DEBUG: /'
rm "$LOCKFILE" 2>/dev/null
exec $0 --just-autoupdated
else
logmsg "-- This version of the backup script appears to be up to date."
fi
# Otherwise we have the most recent version, so we'll just continue
rm /tmp/$CMD_NAME-$$-autoupdate_* 2>/dev/null
fi
else
logmsg "This host ($THIS_HOST) is not listed in the servers list: $SERVERS"
logmsg "Refusing to run on a host that is not listed in the servers list."
exit 255
fi
# Take the jobs list, scrub out the comments and empty lines, and then pass it on to the main loop
ERRORS=0
echo "$JOBS" | grep -v '^[[:space:]]*#' | grep . | while read NAME HOST TYPE WHEN ARCHIVES MAIN_BACKUP_SERVER_STR OTHER_BACKUP_SERVER_STRS; do
MAIN_BACKUP_SERVER=`echo $MAIN_BACKUP_SERVER_STR | sed 's/@.*//g'`
BACKUP_SERVER_STRS="$MAIN_BACKUP_SERVER_STR $OTHER_BACKUP_SERVER_STRS"
# Do we handle this job?
if [ "$THIS_HOST" = "$MAIN_BACKUP_SERVER" ]; then
# Do we need to run this job today?
if [ "$WDAY_TODAY" = "$WHEN" ] || [ "$WHEN" = "*" ]; then
# Setup a few variables
ARCHIVE_PATH="$SERVERS_BACKUP_DIR/archives/$HOST/$TYPE"
BACKUP_PATH="$SERVERS_BACKUP_DIR/working/$HOST/$TYPE/" # For rsync, this must end with a slash
LOGS_PATH="$SERVERS_BACKUP_DIR/logs/$HOST/$TYPE"
JOB_LOG="$LOGS_PATH/$NAME.log"
logmsg "Running backup job \"$NAME\" for host \"$HOST\" ..." | tee -a "$JOB_LOG"
# Setup any globals that might get shared between jobs
if [ "$TYPE" = "mysqldump" ]; then
# For the mysqldump that means getting a list of databases from $HOST, or blank if there's an error
logmsg "-- Getting list of databases from $HOST ..." | tee -a "$JOB_LOG"
DATABASES_STR=`echo SHOW DATABASES | ssh $SSH_OPTIONS $BACKUP_USER@$HOST mysql $MYSQL_OPTIONS`
# If that failed then dumping the databases will too, so output an error and continue to the next job
if [ $? -ne 0 ]; then
logmsg "---- ERROR while getting list of databases from $HOST!" | tee -a "$JOB_LOG"
ERRORS=1
continue;
fi
# If it didn't fail, then we trim the first line using tail to get our list of database names
DATABASES=`echo "$DATABASES_STR" | tail -n +2`
fi
# Gzip the last job log so that we keep an extra log around, and then nuke the old one
$EXEC && gzip "$JOB_LOG" 2>/dev/null
$EXEC && rm "$JOB_LOG" 2>/dev/null
# Go through the list of backup servers for this backup job
for BACKUP_SERVER_STR in $BACKUP_SERVER_STRS; do
# Parse out any bandwidth option attached to the server name
echo "$BACKUP_SERVER_STR" | tr '@' ' ' | (read BACKUP_SERVER BWLIMIT
# Setup a few variables used below, and throw in a little sanity checking when needed
if [ "x$MAIN_BACKUP_SERVER" = "x" ]; then
# The main backup server is the first in the list of backup servers for a job
# It is also the initiator of the backup for all of the others listed in that job
MAIN_BACKUP_SERVER=$BACKUP_SERVER
fi
if [ "x$BWLIMIT" = "x" ]; then
# For rsync, --bwlimit=0 means no limit
BWLIMIT=0
fi
if [ "$BACKUP_SERVER" = "$MAIN_BACKUP_SERVER" ]; then
# If this is the main backup server (eg $THIS_HOST), then copy from $HOST
BACKUP_SRC=$BACKUP_USER@$HOST:/
BACKUP_DST=$BACKUP_PATH
else
# Since we already have the latest copy, then we just sync it outwards to the others.
BACKUP_SRC=$BACKUP_PATH
BACKUP_DST=$BACKUP_USER@$BACKUP_SERVER:$BACKUP_PATH
fi
# and we might as well ensure that the directory structure is here while we're at it
ssh $SSH_OPTIONS $BACKUP_USER@$BACKUP_SERVER "mkdir -p $BACKUP_PATH $ARCHIVE_PATH $LOGS_PATH 2>&1" 2>&1 || exit 911
# Do the appropriate action for this backup job's type (and destination / backup server)
case $TYPE in
# This section handles rsync backup jobs
rsync)
logmsg "-- Syncing from $BACKUP_SRC to $BACKUP_DST ..." | tee -a "$JOB_LOG"
if $EXEC; then
debugmsg "EXEC: rsync $RSYNC_OPTIONS --bwlimit=$BWLIMIT \"$BACKUP_SRC\" \"$BACKUP_DST\" 2>&1" | tee -a "$JOB_LOG"
rsync $RSYNC_OPTIONS --bwlimit=$BWLIMIT "$BACKUP_SRC" "$BACKUP_DST" 2>&1 | tee -a "$JOB_LOG"
# Go through the return code from rsync and warn or error depending on what happened
RSYNC_STATUS=$?
case $RSYNC_STATUS in
0) # Everything was fine
;;
24) # Warnings
logmsg "---- WARNING: Partial transfer due to vanished source files." | tee -a "$JOB_LOG"
;;
*) # Errors (the output message is handled below for this)
exit 911
;;
esac
fi
;;
# This section handles mysqldump backup jobs
mysqldump)
# TODO/BUG: mysqldump doesn't have a bandwidth limit, so any limits are ignored below
# -- one possible solution is to pipe it through ssh and set the bandwidth limit via ssh options, but who knows...
# If this is the main backup server (eg $THIS_HOST), then copy from $HOST
if [ "$BACKUP_SERVER" = "$MAIN_BACKUP_SERVER" ]; then
# NOTE: DATABASES is set above because it needed to be a global so that subsequent
# backup servers listed for this job can use the list without having to re-query for it
for DATABASE in $DATABASES; do
logmsg "-- Dumping database $DATABASE from $HOST to $BACKUP_DST ..." | tee -a "$JOB_LOG"
if $EXEC; then
debugmsg "EXEC: ssh $SSH_OPTIONS $BACKUP_USER@$HOST mysqldump $MYSQLDUMP_OPTIONS \"$DATABASE\" 2>>$JOB_LOG | $MYSQLDUMP_PIPE >$BACKUP_DST/$DATABASE.$MYSQLDUMP_EXT || exit 911"
ssh $SSH_OPTIONS $BACKUP_USER@$HOST mysqldump $MYSQLDUMP_OPTIONS "$DATABASE" 2>>"$JOB_LOG" | $MYSQLDUMP_PIPE >"$BACKUP_DST/$DATABASE.$MYSQLDUMP_EXT" || exit 911
fi
done
else
logmsg "-- Syncing database dumps from $BACKUP_SRC to $BACKUP_DST ..." | tee -a "$JOB_LOG"
if $EXEC; then
debugmsg "EXEC: rsync $RSYNC_OPTIONS --bwlimit=$BWLIMIT \"$BACKUP_SRC\" \"$BACKUP_DST\" 2>&1" | tee -a "$JOB_LOG"
rsync $RSYNC_OPTIONS --bwlimit=$BWLIMIT "$BACKUP_SRC" "$BACKUP_DST" 2>&1 | tee -a "$JOB_LOG"
# Go through the return code from rsync and warn or error depending on what happened
RSYNC_STATUS=$?
case $RSYNC_STATUS in
0) # Everything was fine
;;
24) # Warnings
logmsg "---- WARNING: Partial transfer due to vanished source files." | tee -a "$JOB_LOG"
;;
*) # Errors (the output message is handled below for this)
exit 911
;;
esac
fi
fi
;;
# This section handles pg_dumpall backup jobs
pg_dumpall)
# TODO/BUG: mysqldump doesn't have a bandwidth limit, so any limits are ignored below
# -- one possible solution is to pipe it through ssh and set the bandwidth limit via ssh options, but who knows...
# If this is the main backup server (eg $THIS_HOST), then copy from $HOST
if [ "$BACKUP_SERVER" = "$MAIN_BACKUP_SERVER" ]; then
# NOTE: DATABASES is set above because it needed to be a global so that subsequent
# backup servers listed for this job can use the list without having to re-query for it
for DATABASE in $DATABASES; do
logmsg "-- Dumping database $DATABASE from $HOST to $BACKUP_DST ..." | tee -a "$JOB_LOG"
if $EXEC; then
debugmsg "EXEC: ssh $SSH_OPTIONS $BACKUP_USER@$HOST mysqldump $MYSQLDUMP_OPTIONS \"$DATABASE\" 2>>$JOB_LOG | $MYSQLDUMP_PIPE >$BACKUP_DST/$DATABASE.$MYSQLDUMP_EXT || exit 911"
ssh $SSH_OPTIONS $BACKUP_USER@$HOST mysqldump $MYSQLDUMP_OPTIONS "$DATABASE" 2>>"$JOB_LOG" | $MYSQLDUMP_PIPE >"$BACKUP_DST/$DATABASE.$MYSQLDUMP_EXT" || exit 911
fi
done
else
logmsg "-- Syncing database dumps from $BACKUP_SRC to $BACKUP_DST ..." | tee -a "$JOB_LOG"
if $EXEC; then
debugmsg "EXEC: rsync $RSYNC_OPTIONS --bwlimit=$BWLIMIT \"$BACKUP_SRC\" \"$BACKUP_DST\" 2>&1" | tee -a "$JOB_LOG"
rsync $RSYNC_OPTIONS --bwlimit=$BWLIMIT "$BACKUP_SRC" "$BACKUP_DST" 2>&1 | tee -a "$JOB_LOG"
# Go through the return code from rsync and warn or error depending on what happened
RSYNC_STATUS=$?
case $RSYNC_STATUS in
0) # Everything was fine
;;
24) # Warnings
logmsg "---- WARNING: Partial transfer due to vanished source files." | tee -a "$JOB_LOG"
;;
*) # Errors (the output message is handled below for this)
exit 911
;;
esac
fi
fi
;;
# Also, we'll complain if somebody gave us a backup job type that we don't understand
*)
logmsg "-- No action taken for unknown backup job type \"$TYPE\"!" | tee -a "$JOB_LOG"
;;
esac
# Archive the latest backup
# NOTE: This is done afterwards because the archiving will generally take
# far longer than the actual backup process, and the archiving won't be
# a performance neusance during the middle of the day since it will only
# affect the backup server(s), while the backup process affects the other
# servers directly and could be a problem during a normal business day.
# TODO: Two things can be done to make this process even more efficient
# Archive for all only after the backup jobs have completed (it *IS* wasteful to archive the most recent backup, however, easier on the network and client machines)
# Archive only once:
# Rotate archives on BACKUP_SERVER
# if BACKUP_SERVER == MAIN_BACKUP_SERVER
# Create archive
# else
# scp MAIN_BACKUP_SERVER:archive
if [ "$ARCHIVES" -gt "0" ]; then
ARCHIVE_NAME=$ARCHIVE_PATH/$HOST
# Rotate older archives
logmsg "-- Rotating archives under $BACKUP_SERVER:$ARCHIVE_PATH" | tee -a "$JOB_LOG"
ROTATE_COMMANDS="true" # This must be "true" or some other command that effectively does nothing
COUNT=$ARCHIVES
while [ "$COUNT" -gt "-1" ]; do
NEW_COUNT=$(( $COUNT - 1 ))
if [ "$NEW_COUNT" = "-1" ]; then
OLD_ARCHIVE_NAME=$ARCHIVE_NAME.$TAR_EXT
else
OLD_ARCHIVE_NAME=$ARCHIVE_NAME.$NEW_COUNT.$TAR_EXT
fi
NEW_ARCHIVE_NAME=$ARCHIVE_NAME.$COUNT.$TAR_EXT
ROTATE_COMMANDS="$ROTATE_COMMANDS; [ -f $OLD_ARCHIVE_NAME ] && mv $OLD_ARCHIVE_NAME $NEW_ARCHIVE_NAME 2>&1"
# Update count
COUNT=$NEW_COUNT
done
if $EXEC; then
debugmsg "EXEC: ssh $SSH_OPTIONS $BACKUP_USER@$BACKUP_SERVER \"$ROTATE_COMMANDS\" 2>&1" | tee -a "$JOB_LOG"
ssh $SSH_OPTIONS $BACKUP_USER@$BACKUP_SERVER "$ROTATE_COMMANDS" 2>&1
fi
# Create new archive of the current backup directory
logmsg "-- Archiving backup at $BACKUP_DST to $ARCHIVE_PATH" | tee -a "$JOB_LOG"
if $EXEC; then
debugmsg "EXEC: ssh $SSH_OPTIONS $BACKUP_USER@$BACKUP_SERVER \"tar $TAR_OPTIONS -cf $ARCHIVE_NAME.$TAR_EXT -C $BACKUP_PATH ./ 2>&1\" 2>&1" | tee -a "$JOB_LOG"
ssh $SSH_OPTIONS $BACKUP_USER@$BACKUP_SERVER "tar $TAR_OPTIONS -cf $ARCHIVE_NAME.$TAR_EXT -C $BACKUP_PATH ./ 2>&1" 2>&1 || logmsg "---- ERROR creating archive of backup!"
fi
fi
)
# Check if there were errors while running that job
if [ $? -ne 0 ]; then
logmsg "---- ERROR while running backup job!" | tee -a "$JOB_LOG"
ERRORS=1
break;
fi
done
else
logmsg "Skipping backup job \"$NAME\" for host \"$HOST\" because it doesn't run today." | tee -a "$JOB_LOG"
fi
else
logmsg "Skipping backup job \"$NAME\" for host \"$HOST\" because it runs elsewhere." | tee -a "$JOB_LOG"
fi
done
if [ "$ERRORS" != "0" ]; then
logmsg "There was an error during one or more backup jobs." | tee -a "$JOB_LOG"
exit 10
else
exit 0
fi
# vim:ts=4:sw=4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment