Skip to content

Instantly share code, notes, and snippets.

@pterk
Created July 10, 2012 19:21
Show Gist options
  • Save pterk/3085650 to your computer and use it in GitHub Desktop.
Save pterk/3085650 to your computer and use it in GitHub Desktop.
Small incremental size and bandwidth effective backup solution using rsync and hardlinks
#!/usr/bin/env python
"""Rotating Remote Rsync backup script in python
This code is a reworked python version of a script by Grenville
Armitage from the Centre for Advanced Internet Architectures,
Swinburne University of Technology I found online that is ultamately
derived from
http://www.mikerubel.org/computers/rsync_snapshots/#Incremental
"""
import logging
import optparse
import os
import shutil
import subprocess
import sys
INTERVALS = ['hourly', 'daily', 'weekly']
logger = logging.getLogger()
logging.basicConfig()
class InvalidArgumentsException(Exception):
pass
def sanity_checks(base_target, number, interval):
if not os.path.exists(base_target):
raise InvalidArgumentsException("{0} does not exist".format(target))
if base_target.endswith('/'):
raise InvalidArgumentsException(
"{0} should not end with a slash (/)".format(target))
if number < 1 or number > 23:
raise InvalidArgumentsException(
"Won't make more than 24 backups".format(target))
if interval not in INTERVALS:
raise InvalidArgumentsException(
"Invalid interval: {0}".format(interval))
def rotate(base_target, number, interval):
"""Rotate backup dirs using cpio
<base_target> is the path name of the backup directory, without
trailing "/". Hourly snapshots are presumed to be of the form
<target>.0, <target>.1, etc; daily backups are in <target>.day.0,
<target>.day.1, etc; weekly have the form <target>.week.0, etc.
number indicates the number of snapshots we are keeping at this level
interval indicates whether to update the daily from the hourly, or
the weekly from the hourly snapshots
"""
sanity_checks(base_target, number, interval)
logger.info("rotating {0} backups for {1}".format(interval, base_target))
target = "{0}.{1}".format(base_target, interval)
source = "{0}.hourly.1".format(base_target)
end = 0
if interval == "hourly":
source = "{0}.hourly.0".format(base_target)
end = 1
if not os.path.exists(source):
if interval == "hourly":
os.makedirs(source)
else:
raise IOError(
"hourly snapshot ({0}) does not exist".format(source))
oldest = "{0}.{1}".format(target, number)
if os.path.exists(oldest):
shutil.rmtree(oldest)
# moving backup.5 to backup.6, backup.4 to backup.5 etc.
while number > end:
src = "{0}.{1}".format(target, number - 1)
dest = "{0}.{1}".format(target, number)
if os.path.exists(src):
logger.debug("moving {0} to {1}".format(src, dest))
shutil.move(src, dest)
number = number - 1
dest = "{0}.{1}".format(target, end)
logger.info("symlinking {0} to {1}".format(source, dest))
subprocess.call(['cp', '-al', source, dest])
logger.info("symlinking done".format(source, dest))
def backup(src, base_target, interval, options):
sanity_checks(base_target, options.backups, interval)
rotate(base_target, options.backups, interval)
if interval != 'hourly':
# We're done here
return
logger.info("start rsync {0} {1}.hourly.0".format(src, base_target))
args = ["rsync", "-a", "--update", "--delete"]
if options.rsync_args:
args = args + options.rsync_args.split()
args.append(src)
args.append("{0}.hourly.0".format(base_target))
logger.debug("Executing: {0}".format(" ".join(args)))
if subprocess.check_call(args) == 0:
logger.info("completed rsync {0} {1}.hourly.0".format(src, base_target))
else:
logger.error("rsync {0} {1}.hourly.0 failed".format(src, base_target))
def usage(parser, exit=0):
print parser.format_help()
sys.exit(exit)
if __name__ == '__main__':
parser = optparse.OptionParser()
parser.add_option("-l", "--loglevel", dest="loglevel", default="warn")
parser.add_option("-x", "--rsync-args", dest="rsync_args", default=None)
parser.add_option("-b", "--backups",
action="store", type="int", dest="backups", default=6,
help="number of (incremental) backups to make")
(options, args) = parser.parse_args()
try:
src, target, interval = args
except ValueError:
usage(parser, 1)
if interval not in INTERVALS:
usage(parser, 1)
if not os.path.exists(target):
print "{0} does not exist".format(target)
while target.endswith("/"):
target = target[:-1]
logger.setLevel(getattr(logging, options.loglevel.upper()))
backup(src, target, interval, options)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment