Skip to content

Instantly share code, notes, and snippets.

@ferthalangur
Created December 5, 2015 20:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ferthalangur/07e44402bce1baafe924 to your computer and use it in GitHub Desktop.
Save ferthalangur/07e44402bce1baafe924 to your computer and use it in GitHub Desktop.
Backup GitHub Repositories of interest to a local disk drive
#!/bin/bash
#############################################################
# sync_github_repos
#
# Very simple Bash script to run from cron that will synchronize
# a repository from github.com to our NFS backup disk
# (automounted here as /nfs/netbackups/gitbackups). This will
# also pull the wiki repository, if there is one. We can't pull
# the issues with this script because the GitHub API requires
# a username and password, and we prefer using SSH key authentication.
#
# Configurable Parameters:
#
# BACKUPDIR: The full path of where to create or pull the
# repository (based on the name)
#
# REPOSITORIES: A whitespace-separated list of names relative
# to github.com to be created and/or pulled when the script
# runs
#
# Assumptions:
# o The user running the script has full write permissions in
# $BACKUPDIR
# o SSH for the user running the script has been configured
# to run unattended ... such as a password-less SSH key
# for the user running this script.
# o The SSH key mentioned above has been registered with a
# GitHub user who has read access to any private repo
# that you want to back up.
# o You set up git correctly for this user to access the repos.
#
#
# Todo:
# - Concurency protection -- create/check for lockfile
# - Move REPOSITORIES to an extenal configuration file
# - Parse the messages returned and hide the error messages
# from non-existent Wikis
#
#
# Credits:
# - https://gist.github.com/rodw/3073987 for refinements and some
# Bash KungFu for nicer scripting.
#############################################################################
#
# Author: Rob Jenson - https://github.com/ferthalangur
#
# Copyright 2015 Center for Hellenic Studies, Trustees for Harvard University
# http://chs.harvard.edu
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#############################################################################
# Top-level Backup directory where to put things
BACKUPDIR=${BACKUPDIR-"/nfs/netbackups/gitbackups/github.com"}
# Repositories to be backed up.
# Place the name of each repository on a line by itself.
# the last line should just be a single quotation mark (')
# For example: https://github.com/ferthalangur/spotch should be
# added as ferthalangur/spotch. I'm not going to assume that
# everything we care about will be under the CtrHellenicStudies
# top level.
REPOSITORIES='
CtrHellenicStudies/AD_Tools
CtrHellenicStudies/AHCIP_Common
CtrHellenicStudies/AHCIP_data-migration
CtrHellenicStudies/AHCIP_Editor
CtrHellenicStudies/AHCIP_Text_Server
CtrHellenicStudies/Bonifazi_Particle_Database
CtrHellenicStudies/CHSWeb
CtrHellenicStudies/AHomerCommentaryInProgress
CtrHellenicStudies/Annotations_at_CHS_Website
CtrHellenicStudies/IEJ
CtrHellenicStudies/OpenSeaDragonAnnotation
CtrHellenicStudies/OpenVideoAnnotation
CtrHellenicStudies/Plato_Similes
annotationsatharvard/catcha
annotationframework/AfPersistence
annotationframework/AfShared
annotationframework/AfSecurity
annotationframework/AfUsers
kenchreai/kaa-json
'
# Number of copies of each backup file to keep (set to 0 to never delete)
KEEPCOUNT=${KEEPCOUNT-5}
# Change this if you are working from a different GitHub
GITHOST=${GITHOST-"github.com"}
# Change to true to only show error messages
SILENT=${SILENT-"false"}
# GitHub hostname
# Change this to "" if you want GIT to be less verbose
GIT_VERBOSE=${GIT_VERBOSE-"--verbose"}
# Change this to "--silent" if you want GIT to be quiet
GIT_QUIET=${GIT_QUIET-}
# In case you want to add other clone options from the command line
GIT_CLONE_OPTS=${GIT_CLONE_OPTS-""}
####################################
# No configuration below this line #
####################################
TSTAMP=`date "+%Y%m%d-%H%M"`
GIT_CLONE_CMD="git clone --mirror ${GIT_VERBOSE} ${GIT_QUIET} ${GIT_CLONE_OPTS} "
GIT_USER_PREFIX="git@${GITHOST}:"
ERRORCOUNT=0
# The function `check_fatal` will exit if the command line fails
function check_fatal {
"$@"
status=$?
if [ $status -ne 0 ]; then
echo "ERROR: Encountered error (${status}) while running the following:" >&2
echo " $@" >&2
echo " (at line ${BASH_LINENO[0]} of file $0.)" >&2
echo " Aborting." >&2
exit $status
fi
}
# The function `check_warning` will generate some error messages but will not exit ...
# it will bump up the error counter.
# Most errors are not worth killing the whole script over
function check_warn {
"$@"
status=$?
if [ $status -ne 0 ]; then
echo "ERROR: Encountered error (${status}) while running the following:" >&2
echo " $@" >&2
echo " (at line ${BASH_LINENO[0]} of file $0.)" >&2
ERRORCOUNT=`expr 1 + ${ERRORCOUNT}`
fi
return $status
}
# The function `tgz` will create a gzipped tar archive of the specified
# file ($1) and then remove the original
function tgz {
fullpath=$1
target=`basename ${fullpath}`
parent=`dirname ${fullpath}`
check_warn tar zcf ${fullpath}.tar.gz -C ${parent} ${target} \
&& check_warn rm -rf ${fullpath}
}
function rotate_file {
dir=$1
prefix=$2
if [ $KEEPCOUNT -gt 0 ]
then
pushd ${dir} >/dev/null
(ls -t ${prefix}* | head -n ${KEEPCOUNT} ; ls ${prefix}* )|sort|uniq -u|xargs rm -f
popd >/dev/null
fi
}
$SILENT || (echo "" && echo "=== INTIALIZING ===" && echo "")
# No point gathering statistics if we won't show them
$SILENT || {
STARTTIME=`date "+%s"`
BEFORESIZE=`du -sm ${BACKUPDIR} | awk '{print($1);}'`
COUNTER=0
}
check_fatal mkdir -p $BACKUPDIR
$SILENT || (echo "" && echo "=== BACKUPS BEGIN ===" && echo "")
for repo in ${REPOSITORIES}
do
targetdir="${BACKUPDIR}/${repo}"
check_fatal mkdir -p ${targetdir}
fileprefix=`basename ${repo}`
$SILENT || echo "Backing up ${GITHOST}/${repo} to ${targetdir}"
check_warn ${GIT_CLONE_CMD} ${GIT_USER_PREFIX}${repo}.git ${targetdir}/${fileprefix}.repo-${TSTAMP}.git \
&& tgz ${targetdir}/${fileprefix}.repo-${TSTAMP}.git \
&& rotate_file ${targetdir} ${fileprefix}.repo
$SILENT || echo "Backing up ${GITHOST}/${repo}.wiki (if any)"
# Note -- don't run this under "check" because if there is no wiki you don't want to abort the script
${GIT_CLONE_CMD} ${GIT_USER_PREFIX}${repo}.wiki.git ${targetdir}/${fileprefix}.wiki-${TSTAMP}.git \
&& tgz ${targetdir}/${fileprefix}.wiki-${TSTAMP}.git \
&& rotate_file ${targetdir} ${fileprefix}.wiki
$SILENT || COUNTER=`expr 1 + ${COUNTER}`
done
$SILENT || (echo "" && echo "=== BACKUPS COMPLETE ===" && echo "")
$SILENT || {
FINISHTIME=`date "+%s"`
AFTERSIZE=`du -sm ${BACKUPDIR} | awk '{print($1);}'`
# Yes, I assume that the backup will never shrink!
SIZECHANGE=`expr ${AFTERSIZE} - ${BEFORESIZE}`
echo ""
echo "Backed up ${COUNTER} Repositories from ${GITHOST}."
echo "Total backup space in use is ${AFTERSIZE} MB ... an increase of ${SIZECHANGE} MB"
if [ ${ERRORCOUNT} -gt 0 ]
then
echo "WARNING: There were ${ERRORCOUNT} errors during execution."
fi
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment