Skip to content

Instantly share code, notes, and snippets.

@simpleadm
Created February 15, 2018 12:59
Show Gist options
  • Save simpleadm/506d539f664022f98230f0398f60cbd4 to your computer and use it in GitHub Desktop.
Save simpleadm/506d539f664022f98230f0398f60cbd4 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# coding: utf-8
# copyright: 2011, Igor Katson, igor.katson@gmail.com
"""What this script does, is logartihmically keep files, that means,
when you provide a dir, or a file pattern to it, it can calculate,
which files to keep based on the following parameters:
- keep 1 file each day for --days days,
- keep 1 file each week for --weeks weeks (after --days processing)
- keep 1 file each month for --month months (after --month processing)
Use ./rotater --help for help
"""
import os
import datetime
import sys
import re
import optparse
import logging
import types
import urlparse
import ConfigParser
from django.conf import settings
settings.configure()
from django.core.exceptions import ImproperlyConfigured
from django.core.files.storage import FileSystemStorage, Storage
log = logging.getLogger('rotater.py')
# Regexps to take the date from filename
DATE_RE = (
re.compile(r'(20\d{2})-(\d{2})-(\d{2})'),
re.compile(r'(20\d{2})(\d{2})(\d{2})'),
)
class WalkingStorageMixin(object):
def __init__(self, *args, **kwargs):
self.walk_top = kwargs.pop('walk_top', '')
super(WalkingStorageMixin, self).__init__(*args, **kwargs)
def walk(self, top=None, topdown=True, onerror=None):
"""An implementation of os.walk() which uses the Django storage for
listing directories."""
top = top or self.walk_top
try:
dirs, nondirs = self.listdir(top)
except os.error, err:
if onerror is not None:
onerror(err)
return
if topdown:
yield top, dirs, nondirs
for name in dirs:
new_path = os.path.join(top, name)
for x in self.walk(new_path):
yield x
if not topdown:
yield top, dirs, nondirs
class WalkingFileStorage(WalkingStorageMixin, FileSystemStorage):
pass
try:
from storages.backends.s3boto import S3BotoStorage
class WalkingS3Storage(WalkingStorageMixin, S3BotoStorage):
pass
except ImportError:
S3BotoStorage = None
WalkingS3Storage = None
def get_storage_by_path(path, **options):
def get_amazon_auth(options):
# Try to get keys from options.
key = options.get('amazon_access_key')
secret = options.get('amazon_secret_key')
if key and secret:
return key, secret
# Try to get keys from environment.
key = os.environ.get('AWS_ACCESS_KEY_ID')
secret = os.environ.get('AWS_SECRET_ACCESS_KEY')
if key and secret:
return key, secret
# Try to get keys from ~/.s3cfg, the file used by s3cmd.
s3cfg = os.path.expanduser('~/.s3cfg')
if os.path.exists(s3cfg):
parser = ConfigParser.ConfigParser()
parser.read([s3cfg])
key = parser.get('default', 'access_key')
secret = parser.get('default', 'secret_key')
if key and secret:
return key, secret
raise ValueError('AWS access credentials not provided.')
if path.startswith('s3://'):
parsed = urlparse.urlparse(path)
if not parsed.netloc:
raise ValueError(
'You should provide at least a bucket name, e.g. s3://BUCKET/')
path = parsed.path
if path:
path = path[1:]
access_key, secret_key = get_amazon_auth(options)
return WalkingS3Storage(
bucket=parsed.netloc,
walk_top=path,
access_key=access_key,
secret_key=secret_key,
)
return WalkingFileStorage(path)
class BaseRotater(object):
"""A base class for rotaters, override files_to_delete for it to work"""
def __init__(self, storage, regex=None, **kwargs):
assert isinstance(storage, Storage)
self.storage = storage
self.regex = regex
if isinstance(self.regex, basestring):
self.regex = re.compile(self.regex)
self.recurse = kwargs.pop('recurse', False)
self.options = kwargs
def walk(self):
"""Walk through all files and dirs, that are candidates for removal."""
for dir, dirs, files in self.storage.walk():
if not self.regex:
yield dir, dirs, files
else:
yield dir, dirs, [i for i in files
if self.regex.match(os.path.join(dir, i))]
if not self.recurse:
raise StopIteration
def files_to_delete(self):
"""Return a list of files to be deleted"""
raise NotImplementedError
def files_to_keep(self):
"""Get files to keep based on files to delete"""
delete = set(self.files_to_delete())
for dir, dirs, files in self.walk():
for file in files:
file = os.path.join(dir, file)
if file not in delete:
yield file
def rotate(self):
for file in self.files_to_delete():
log.info('Deleting %s' % file)
self.storage.delete(file)
def _get_mtime(self, filename):
"""Get modification time of the file based on filename or mtime."""
if self.options.get('date_from_filename', True):
for re in DATE_RE:
match = re.search(filename)
if match:
year, month, day = match.groups()
try:
mtime = datetime.date(int(year), int(month), int(day))
return datetime.datetime(
mtime.year, mtime.month, mtime.day)
except ValueError:
pass
return self.storage.modified_time(filename)
class LogarithmicRotater(BaseRotater):
DEFAULT_DAYS = 14
DEFAULT_WEEKS = 12
DEFAULT_MONTHS = 36
def __init__(self, *args, **kwargs):
self.days = kwargs.pop('days', None)
if self.days is None:
self.days = self.DEFAULT_DAYS
self.weeks = kwargs.pop('weeks', None)
if self.weeks is None:
self.weeks = self.DEFAULT_WEEKS
self.months = kwargs.pop('months', None)
if self.months is None:
self.months = self.DEFAULT_MONTHS
super(LogarithmicRotater, self).__init__(*args, **kwargs)
def _logarithmic_rotate(self, files):
"""Files is a list of files to check for deletion
files argument is a list of 2-tuples with mtime and filename.
- keep 1 file each day for self.days days,
- keep 1 file each week for self.weeks weeks (after --days processing)
- keep 1 file each month for self.months months (after --month processing)
Return a list of files which can be deleted
"""
start_rotate = datetime.date.today()
weeks_start = start_rotate - datetime.timedelta(days=self.days)
months_start = weeks_start - datetime.timedelta(weeks=self.weeks)
end_rotate = months_start - datetime.timedelta(days=self.months * 30)
kept_days = set()
get_month = lambda mdate: mdate.replace(day=1)
get_week = lambda mdate: mdate - datetime.timedelta(days=mdate.weekday())
for mtime, file in files:
mdate = mtime.date()
if mdate <= end_rotate:
yield file
elif end_rotate < mdate <= months_start:
# Keep one file for each month
month = get_month(mdate)
if month in kept_days:
yield file
else:
kept_days.add(month)
elif months_start < mdate <= weeks_start:
# Keep one file for each week
week = get_week(mdate)
if week in kept_days:
yield file
else:
kept_days.add(week)
elif weeks_start < mdate <= start_rotate:
# Keep one file for each day
if mdate in kept_days:
yield file
else:
kept_days.add(mdate)
else:
# The file seems to be from future, keep it
pass
def files_to_delete(self):
for top, dirs, files in self.walk():
files = [os.path.join(top, file) for file in files]
files = (
(self._get_mtime(f), f) for f in files
)
files = self._logarithmic_rotate(files)
for file in files:
yield file
if __name__ == '__main__':
optparser = optparse.OptionParser(
usage="""usage: %prog [options] path
"path" may be a path to a directory, or an s3 URL e.g.
s3://BUCKET[/DIRNAME]""")
optparser.add_option(
'--regex', help='Optional regex to match filenames for.'
' The full path will be matched.')
optparser.add_option(
'--days', type='int',
help='keep 1 file each day for --days days [%default]',
default=LogarithmicRotater.DEFAULT_DAYS)
optparser.add_option(
'--weeks', type='int',
help='keep 1 file each week for --weeks weeks [%default]',
default=LogarithmicRotater.DEFAULT_WEEKS)
optparser.add_option(
'--months', type='int',
help='keep 1 file each month for --months months [%default]',
default=LogarithmicRotater.DEFAULT_MONTHS)
optparser.add_option(
'--no-date-from-filename', action='store_false',
help='do not try to guess modification time from filename [%default]',
default=True, dest='date_from_filename')
optparser.add_option(
'--test', '--test-delete', action='store_true', default=False,
dest='test_delete',
help='test mode, no files will be deleted, the ones to '
'DELETE will be printed to stdout [%default]')
optparser.add_option(
'--test-keep', action='store_true', default=False,
help='test mode, no files will be deleted, the ones to '
'KEEP will be printed to stdout [%default]')
optparser.add_option(
'--recurse', action='store_true', default=False,
help='Recurse into subdirectories [%default]',
)
optparser.add_option(
'--loglevel',
default='DEBUG',
help='One of DEBUG, WARNING, INFO, ERROR [%default]')
optparser.add_option('--amazon_access_key', type='string')
optparser.add_option('--amazon_secret_key', type='string')
options, args = optparser.parse_args()
if len(args) != 1:
print >> sys.stderr, '%s takes only 1 argument\nAborting...' % sys.argv[0]
sys.exit(1)
dir = args[0]
loglevels = {
'debug': logging.DEBUG,
'info': logging.INFO,
'warning': logging.WARNING,
'error': logging.ERROR,
}
loglevel=loglevels[options.loglevel.lower()]
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
log.addHandler(handler)
log.setLevel(loglevel)
storage = get_storage_by_path(dir, **options.__dict__)
rotater = LogarithmicRotater(storage, **options.__dict__)
if options.test_delete:
for file in sorted(rotater.files_to_delete()):
log.info('will delete "%s"', file)
elif options.test_keep:
for file in sorted(rotater.files_to_keep()):
log.info('will keep "%s"', file)
else:
rotater.rotate()
@simpleadm
Copy link
Author

Usage: rotater.py --days 21 --weeks 24 --month 36 /backups/db

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment