Last active
April 3, 2020 18:22
-
-
Save Amunak/7e2a5f3b1e344287883963689ddc86ef to your computer and use it in GitHub Desktop.
A simple backup-pruning Python script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from math import ceil | |
from pathlib import Path | |
from datetime import datetime | |
""" | |
A simple backup-pruning Python script | |
created by Amunak; | |
feel free to re-use and modify | |
Note that it uses a single pass for selecting the files and very simple logic to do so. | |
The files kept are CUMULATIVE, which means that if you select that you want last 20 files and last 4 weeks, | |
you will always retain the last 20 files and then 4 weeks ON TOP of that (same with months). This leads to often having | |
multiple files per week/month one after another, which is a limitation of the script (though not a big one IMO). | |
Additionally one file per year is kept. | |
Note that the files are simply sorted latest to oldest and the logic is applied from there, which means that for the past X | |
months you are guaranteed to get at least one file per month, but unlike usual pruning solutions you will get the LAST file | |
in that month as opposed to the first one. It might seem a bit ugly, but it keeps this script simple and doesn't matter in the end. | |
""" | |
# Arm the script? (True deletes files, False is a dry run) | |
arm = False | |
# The target directory | |
target_dir = '/var/backup/mysql' | |
# Glob pattern for matching files (so that you select only what you want) | |
glob_pattern = 'mysql-*.sql' | |
# Pattern for extracting date out of the filenames (gets passed to datetime.strptime) | |
date_pattern = 'mysql-%Y-%m-%d.sql' | |
# How many of each files to keep | |
keep_last = 10 | |
keep_weeks = 4 | |
keep_months = 6 | |
def week_of_month(dt): | |
first_day = dt.replace(day=1) | |
dom = dt.day | |
adjusted_dom = dom + first_day.weekday() | |
return int(ceil(adjusted_dom/7.0)) | |
def get_files(target): | |
path = Path(target) | |
files = [file for file in path.glob(glob_pattern) if file.is_file()] | |
files.sort(reverse=True) | |
return files | |
def __main__(): | |
week = 0 | |
weeks = keep_weeks | |
month = 0 | |
months = keep_months | |
year = 0 | |
count = 0 | |
files_to_remove = [] | |
for file in get_files(target_dir): | |
try: | |
date = datetime.strptime(file.name, date_pattern) | |
except ValueError: | |
print(f'Failed reading date from "{file.name}", skipping') | |
continue | |
count += 1 | |
print(f'File #{count} "{file.name}": ', end='') | |
remove = True | |
if count <= keep_last: | |
print(f'{count} out of last {keep_last} that are always kept') | |
remove = False | |
elif week != week_of_month(date) and weeks > 0: | |
week = week_of_month(date) | |
print(f'First of week {week} (keeping week {weeks} out of {keep_weeks})') | |
weeks -= 1 | |
remove = False | |
elif month != date.month and months > 0: | |
month = date.month | |
print(f'First of month {month} (keeping month {months} out of {keep_months})') | |
months -= 1 | |
remove = False | |
elif year != date.year: | |
year = date.year | |
print(f'First of year {year}') | |
remove = False | |
if remove: | |
print('REMOVING') | |
files_to_remove.append(file) | |
if arm: | |
for file in files_to_remove: | |
file.unlink() | |
if __name__ == '__main__': | |
__main__() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment