Skip to content

Instantly share code, notes, and snippets.

@qpfiffer
Created December 3, 2013 22:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save qpfiffer/7778797 to your computer and use it in GitHub Desktop.
Save qpfiffer/7778797 to your computer and use it in GitHub Desktop.
@nijotz backup log purging script
#!/usr/bin/env python
import calendar
import os
import re
import time
from datetime import datetime
def in_purge_window(base, target, now=time.time()):
'''
The purge window is 1/10th the age of the base date, further back in time.
This function returns true if target date is within the window.
'''
assert now > base > target
return target > (now - 1.1 * (now - base))
def get_purgeables(items, now=time.time()):
'''
This function checks the purge window for each item and makes sure that
there is only one item in that window and that it is the oldest. Items to
be removed are stored in a list and returned.
'''
remove = []
current = 0
# The -2 is to make sure we never remove the last item
while current < len(items) - 2:
# Look two items ahead of current...
keeper = current + 2
purgeable = current + 1
# if the keeper is within the purge window, then so is the purgeable.
# We want to keep the older of the two (keeper), so add the purgeable
# to the list of items to remove.
while keeper < len(items) and in_purge_window(items[current], items[keeper], now):
# Remove the item directly before the one that is in the purge window
remove.append(items[purgeable])
keeper += 1
purgeable += 1
# Out of the loop, keeper is out of the purge window, so current should
# be the last item item that was kept.
current = keeper - 1
return remove
def find_datetime(string):
regexs = [
re.compile('(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})-(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})'),
re.compile('(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<hour>\d{1,2}):(?P<minute>\d{2}):(?P<second>\d{2})'),
re.compile('(?P<year>\d{4})_(?P<month>\d{2})_(?P<day>\d{2})_(?P<hour>\d{2})_(?P<minute>\d{2})_(?P<second>\d{2})'),
re.compile('(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})'),
re.compile('(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})'),
]
for regex in regexs:
match = regex.search(string)
if match:
datedata = match.groupdict()
# Convert the strings to ints
datedata = dict( [ (k, int(v)) for k,v in datedata.iteritems() ] )
# The replace is because only sometimes do we have hour/min/sec
dt = datetime(datedata['year'], datedata['month'],
datedata['day']).replace(**datedata)
return calendar.timegm(dt.timetuple())
return None
def main():
import argparse
parser = argparse.ArgumentParser(description='Purges logs in the specified directory.')
parser.add_argument("-d", "--directory", nargs=1, action="store",
help='Specifies the directory to work in.', type=str)
parser.add_argument("-n", "--noop", action="store_const", const=True,
help='No operation. Does not apply any changes.',
default=False)
# -y for "yes" -f because it mimics rm's behavior
parser.add_argument("-y", "-f", action="store_const", const=True,
help='Does not prompt for deletion.',
default=False)
args = parser.parse_args()
if args.directory == None:
parser.print_help()
quit()
if args.noop and args.y:
print "Cannot have -f and -n at the same time. Try again."
quit()
# Find dates in the filenames of all the files in the given directory
files = os.listdir(args.directory[0])
dates_files = dict( [ (find_datetime(f), f) for f in files ] )
# ignore files with incomprehensible dates
if None in dates_files:
dates_files.pop(None)
# Newest to oldest:
dates = sorted(dates_files.keys(), reverse=True)
# Get the dates that need removing
remove = get_purgeables(dates)
# Print out which ones will be removed
datestrings = dict(
[d, time.strftime('%Y %b %d %H:%M:%S', time.gmtime(d))]
for d in dates)
for d in dates:
print datestrings[d].rjust(23),
print ' | ',
print dates_files[d].rjust(38),
print ' | ',
print '*' if d in remove else ''
cont = None
# No operation
if args.noop == True:
cont = False
# Force operation, do not prompt for input
if args.y == True:
cont = True
# If neither was specified
if cont == None:
cont = raw_input('Continue [y/N]')
if cont[0].lower() == 'y':
cont = True
else:
cont = False
if cont:
for r in remove:
os.remove(os.path.join(args.directory[0], dates_files[r]))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment