Created
December 3, 2013 22:32
-
-
Save qpfiffer/7778797 to your computer and use it in GitHub Desktop.
@nijotz backup log purging script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import calendar | |
import os | |
import re | |
import time | |
from datetime import datetime | |
def in_purge_window(base, target, now=time.time()): | |
''' | |
The purge window is 1/10th the age of the base date, further back in time. | |
This function returns true if target date is within the window. | |
''' | |
assert now > base > target | |
return target > (now - 1.1 * (now - base)) | |
def get_purgeables(items, now=time.time()): | |
''' | |
This function checks the purge window for each item and makes sure that | |
there is only one item in that window and that it is the oldest. Items to | |
be removed are stored in a list and returned. | |
''' | |
remove = [] | |
current = 0 | |
# The -2 is to make sure we never remove the last item | |
while current < len(items) - 2: | |
# Look two items ahead of current... | |
keeper = current + 2 | |
purgeable = current + 1 | |
# if the keeper is within the purge window, then so is the purgeable. | |
# We want to keep the older of the two (keeper), so add the purgeable | |
# to the list of items to remove. | |
while keeper < len(items) and in_purge_window(items[current], items[keeper], now): | |
# Remove the item directly before the one that is in the purge window | |
remove.append(items[purgeable]) | |
keeper += 1 | |
purgeable += 1 | |
# Out of the loop, keeper is out of the purge window, so current should | |
# be the last item item that was kept. | |
current = keeper - 1 | |
return remove | |
def find_datetime(string): | |
regexs = [ | |
re.compile('(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})-(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})'), | |
re.compile('(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<hour>\d{1,2}):(?P<minute>\d{2}):(?P<second>\d{2})'), | |
re.compile('(?P<year>\d{4})_(?P<month>\d{2})_(?P<day>\d{2})_(?P<hour>\d{2})_(?P<minute>\d{2})_(?P<second>\d{2})'), | |
re.compile('(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})'), | |
re.compile('(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})'), | |
] | |
for regex in regexs: | |
match = regex.search(string) | |
if match: | |
datedata = match.groupdict() | |
# Convert the strings to ints | |
datedata = dict( [ (k, int(v)) for k,v in datedata.iteritems() ] ) | |
# The replace is because only sometimes do we have hour/min/sec | |
dt = datetime(datedata['year'], datedata['month'], | |
datedata['day']).replace(**datedata) | |
return calendar.timegm(dt.timetuple()) | |
return None | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser(description='Purges logs in the specified directory.') | |
parser.add_argument("-d", "--directory", nargs=1, action="store", | |
help='Specifies the directory to work in.', type=str) | |
parser.add_argument("-n", "--noop", action="store_const", const=True, | |
help='No operation. Does not apply any changes.', | |
default=False) | |
# -y for "yes" -f because it mimics rm's behavior | |
parser.add_argument("-y", "-f", action="store_const", const=True, | |
help='Does not prompt for deletion.', | |
default=False) | |
args = parser.parse_args() | |
if args.directory == None: | |
parser.print_help() | |
quit() | |
if args.noop and args.y: | |
print "Cannot have -f and -n at the same time. Try again." | |
quit() | |
# Find dates in the filenames of all the files in the given directory | |
files = os.listdir(args.directory[0]) | |
dates_files = dict( [ (find_datetime(f), f) for f in files ] ) | |
# ignore files with incomprehensible dates | |
if None in dates_files: | |
dates_files.pop(None) | |
# Newest to oldest: | |
dates = sorted(dates_files.keys(), reverse=True) | |
# Get the dates that need removing | |
remove = get_purgeables(dates) | |
# Print out which ones will be removed | |
datestrings = dict( | |
[d, time.strftime('%Y %b %d %H:%M:%S', time.gmtime(d))] | |
for d in dates) | |
for d in dates: | |
print datestrings[d].rjust(23), | |
print ' | ', | |
print dates_files[d].rjust(38), | |
print ' | ', | |
print '*' if d in remove else '' | |
cont = None | |
# No operation | |
if args.noop == True: | |
cont = False | |
# Force operation, do not prompt for input | |
if args.y == True: | |
cont = True | |
# If neither was specified | |
if cont == None: | |
cont = raw_input('Continue [y/N]') | |
if cont[0].lower() == 'y': | |
cont = True | |
else: | |
cont = False | |
if cont: | |
for r in remove: | |
os.remove(os.path.join(args.directory[0], dates_files[r])) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment