Last active
November 12, 2018 10:35
-
-
Save esolitos/0a2f9c94bd4a4f86ed1290a908f0b26b to your computer and use it in GitHub Desktop.
UNMAINTAINED - See `ramsalt/cron-scripts`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, sys, subprocess, re | |
from datetime import datetime, timedelta | |
if __name__ == '__main__': | |
now_date = datetime.today() | |
date_7days_ago = now_date - timedelta(days=7) | |
date_4weeks_ago = now_date - timedelta(weeks=4) | |
bucket_name = 'INSERT_AWS_S3_BUCKET_NAME_HERE' | |
db_bkps = {} | |
file_bkps = {} | |
bkps_in_excess = [] | |
print "\n##### 1. Fetch list of stored backups from S3...\n" | |
proc = subprocess.Popen(['/usr/local/bin/s3cmd', 'ls', 's3://'+bucket_name+'/'], stdout=subprocess.PIPE) | |
(out, err) = proc.communicate() | |
# print out | |
# print err | |
## Debug! | |
# f = open('out.txt', 'w') | |
# f.write(out) | |
# f.close() | |
print "\n##### 2. Match and index all items...\n" | |
for line in out.split('\n'): | |
# for line in open('out.txt', 'r').read().split('\n'): | |
backup = line.split() | |
try: | |
re_s3_prefix = '(?:' + re.escape('s3://'+bucket_name+'/') + ')' | |
re_grp_site = '(?P<site>[a-z0-9\-]+_[a-z]+)' | |
re_grp_datetime = '(?P<date>(?:[\d\-]+){3}T(?:[\d\-]+){3})' | |
re_grp_hash = '(?P<hash>[a-z0-9]+)' | |
re_grp_bktype = '(?P<bktype>files|db)' | |
b_info_pattern = '^'+re_s3_prefix+re_grp_site+'_'+re_grp_datetime+'_'+re_grp_hash+'_'+re_grp_bktype+'.*(?:\.(tar|sql)(\.gz)?)$' | |
match = re.match(b_info_pattern, backup[3], flags=re.IGNORECASE) | |
if match is not None: | |
# print match.groupdict() | |
info = match.groupdict() | |
bkp_type = info['bktype']; | |
bkp_site = info['site'] | |
bkp_date = datetime.strptime(info['date'], "%Y-%m-%dT%H-%M-%S") | |
date_cal = bkp_date.isocalendar() | |
year_sel ='Y' + str(date_cal[0]) | |
week_sel ='W' + str(date_cal[1]) | |
# Skip the last 7 days untouched. | |
if bkp_date > date_7days_ago: | |
# print "Keep ", bkp_type, " from ", bkp_date, " for: ", bkp_site | |
continue | |
# Simple Logic for FILE backups | |
if bkp_type == 'files': | |
# Default value, take the first available | |
if bkp_site not in file_bkps: | |
file_bkps[bkp_site] = {'info': info, 'date': bkp_date, 'bkp': backup} | |
isOlderThanFourWeeks = bkp_date < date_4weeks_ago | |
isNewerThanSelected = bkp_date > file_bkps[bkp_site]['date'] | |
if isNewerThanSelected: | |
# Remove current selected if it is not the newest and it's from more than 4 weeks ago | |
if file_bkps[bkp_site]['date'] < date_4weeks_ago: | |
bkps_in_excess.append(file_bkps[bkp_site]) | |
file_bkps[bkp_site] = {'info': info, 'date': bkp_date, 'bkp': backup} | |
elif isOlderThanFourWeeks: | |
# print "Select to remove FILES backups older than 4 weeks. Site: ", bkp_site, ' Date: ', bkp_date | |
bkps_in_excess.append({'info': info, 'date': bkp_date, 'bkp': backup}) | |
elif bkp_type == 'db': | |
# initialize SITE map | |
if bkp_site not in db_bkps: | |
db_bkps[bkp_site] = {year_sel: {}} | |
# initialize SITE][YEAR map | |
if year_sel not in db_bkps[bkp_site]: | |
db_bkps[bkp_site][year_sel] = {} | |
if week_sel not in db_bkps[bkp_site][year_sel]: | |
db_bkps[bkp_site][year_sel][week_sel] = {'info': info, 'date': bkp_date, 'year': year_sel, 'week': week_sel, 'bkp': backup} | |
elif db_bkps[bkp_site][year_sel][week_sel]['date'] < bkp_date: | |
# Mark this as extra backup to remove | |
bkps_in_excess.append(db_bkps[bkp_site][year_sel][week_sel]) | |
# as we only keep the most recent backup per week | |
db_bkps[bkp_site][year_sel][week_sel] = {'info': info, 'date': bkp_date, 'year': year_sel, 'week': week_sel, 'bkp': backup} | |
else: | |
# Only keep 1 (latest) backup per week | |
bkps_in_excess.append({'info': info, 'year': year_sel, 'week': week_sel, 'bkp': backup}) | |
else: | |
print 'Unknown backup type: ', bkp_type | |
else: | |
print 'No matching groups:', line | |
except IndexError: | |
print 'Skip line: ', line | |
print "\n##### 3. Remove extra backups...\n" | |
cur_instance = None | |
cur_type = None | |
chosen_bkp_date = None | |
for bkp_data in bkps_in_excess: | |
if (cur_instance is None or cur_instance != bkp_data['info']['site']): | |
cur_type = None | |
cur_instance = bkp_data['info']['site'] | |
print "\n# Instance:", cur_instance | |
if (cur_type is None or cur_type != bkp_data['info']['bktype']): | |
cur_type = bkp_data['info']['bktype'] | |
print '## Type: ', cur_type | |
if cur_type == 'files': | |
print '#' | |
print '+ Most recent: ', file_bkps[cur_instance]['date'] | |
# print '+ Keeping: ', file_bkps[cur_instance]['date'] | |
bkp_name = bkp_data['bkp'][3] | |
if cur_type == 'db': | |
_bkp_date_selection = db_bkps[ cur_instance ][ bkp_data['year'] ][ bkp_data['week'] ]['date'] | |
if (chosen_bkp_date is None or chosen_bkp_date != _bkp_date_selection): | |
chosen_bkp_date = _bkp_date_selection | |
print '#' | |
print '+ Keeping weekly: ', chosen_bkp_date | |
print '- Remove: ', bkp_data['info']['date'] | |
# Remove from S3 | |
proc = subprocess.Popen(['/usr/local/bin/s3cmd', 'del', bkp_name], stdout=subprocess.PIPE) | |
(out, err) = proc.communicate() | |
# print out | |
if (err is not None): | |
print "Error encountered:" | |
print err | |
print "##### X. ...done!\n" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Rev2: Allow also numbers in the site name matching.