Skip to content

Instantly share code, notes, and snippets.

@Amunak
Last active April 3, 2020 18:22
Show Gist options
  • Save Amunak/7e2a5f3b1e344287883963689ddc86ef to your computer and use it in GitHub Desktop.
Save Amunak/7e2a5f3b1e344287883963689ddc86ef to your computer and use it in GitHub Desktop.
A simple backup-pruning Python script
#!/usr/bin/env python3
from math import ceil
from pathlib import Path
from datetime import datetime
"""
A simple backup-pruning Python script
created by Amunak;
feel free to re-use and modify
Note that it uses a single pass for selecting the files and very simple logic to do so.
The files kept are CUMULATIVE, which means that if you select that you want last 20 files and last 4 weeks,
you will always retain the last 20 files and then 4 weeks ON TOP of that (same with months). This leads to often having
multiple files per week/month one after another, which is a limitation of the script (though not a big one IMO).
Additionally one file per year is kept.
Note that the files are simply sorted latest to oldest and the logic is applied from there, which means that for the past X
months you are guaranteed to get at least one file per month, but unlike usual pruning solutions you will get the LAST file
in that month as opposed to the first one. It might seem a bit ugly, but it keeps this script simple and doesn't matter in the end.
"""
# Arm the script? (True deletes files, False is a dry run)
arm = False
# The target directory
target_dir = '/var/backup/mysql'
# Glob pattern for matching files (so that you select only what you want)
glob_pattern = 'mysql-*.sql'
# Pattern for extracting date out of the filenames (gets passed to datetime.strptime)
date_pattern = 'mysql-%Y-%m-%d.sql'
# How many of each files to keep
keep_last = 10
keep_weeks = 4
keep_months = 6
def week_of_month(dt):
first_day = dt.replace(day=1)
dom = dt.day
adjusted_dom = dom + first_day.weekday()
return int(ceil(adjusted_dom/7.0))
def get_files(target):
path = Path(target)
files = [file for file in path.glob(glob_pattern) if file.is_file()]
files.sort(reverse=True)
return files
def __main__():
week = 0
weeks = keep_weeks
month = 0
months = keep_months
year = 0
count = 0
files_to_remove = []
for file in get_files(target_dir):
try:
date = datetime.strptime(file.name, date_pattern)
except ValueError:
print(f'Failed reading date from "{file.name}", skipping')
continue
count += 1
print(f'File #{count} "{file.name}": ', end='')
remove = True
if count <= keep_last:
print(f'{count} out of last {keep_last} that are always kept')
remove = False
elif week != week_of_month(date) and weeks > 0:
week = week_of_month(date)
print(f'First of week {week} (keeping week {weeks} out of {keep_weeks})')
weeks -= 1
remove = False
elif month != date.month and months > 0:
month = date.month
print(f'First of month {month} (keeping month {months} out of {keep_months})')
months -= 1
remove = False
elif year != date.year:
year = date.year
print(f'First of year {year}')
remove = False
if remove:
print('REMOVING')
files_to_remove.append(file)
if arm:
for file in files_to_remove:
file.unlink()
if __name__ == '__main__':
__main__()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment