Last active
May 3, 2019 10:37
-
-
Save AdrienHorgnies/2da12373a13804c742f1dd85caff9ea6 to your computer and use it in GitHub Desktop.
get rid of directories which are not referenced by a symbolic link because they're useless in my use case
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import logging | |
import os | |
import re | |
from datetime import datetime | |
from subprocess import call | |
from tempfile import mkdtemp | |
republication_path = '/data/activity_resources/republication' | |
current_path = os.path.join(republication_path, 'current') | |
languages = {'AR_EG', 'BG_BG', 'CS_CZ', 'DA_DK', 'DE_DE', 'EL_GR', 'EN_GB', 'EN_US', 'ES_ES', 'ET_EE', 'FI_FI', 'FR_FR', 'GA_IE', 'HR_HR', 'HU_HU', 'ID_ID', 'IT_IT', 'LT_LT', 'LV_LV', 'MT_MT', 'NL_NL', 'PT_BR', 'PT_PT', 'PL_PL', 'RO_RO', 'RU_RU', 'SK_SK', 'SL_SI', 'SV_SE', 'TR_TR', 'NB_NO'} | |
info_format = logging.Formatter(fmt='%(message)s') | |
debug_format = logging.Formatter(fmt='%(asctime)s %(name)s:%(lineno)d %(levelname)s %(message)s') | |
def main(): | |
if not args.do: | |
log.info('This is a DRY RUN, use option --do to make a real run') | |
snapshots = find_snapshots() | |
log.debug('found {} snapshot(s): {}'.format(len(snapshots), snapshots)) | |
used_snapshots = find_used_snapshots() | |
log.debug('found {} used snapshot(s): {}'.format(len(used_snapshots), used_snapshots)) | |
useless_snapshots = snapshots - used_snapshots | |
log.debug('found {} useless snapshot(s): {}'.format(len(useless_snapshots), useless_snapshots)) | |
for snap in useless_snapshots: | |
log.info('deleting {}'.format(snap)) | |
delete_dir(os.path.join(republication_path, snap)) | |
def delete_dir(directory): | |
empty_dir = mkdtemp() | |
# use rsync on empty directory because it is very efficient, it guarantees it deletes in the correct order | |
aria = ['rsync', '-a', '--delete', os.path.join(empty_dir, ''), directory] | |
log.debug(' '.join(aria)) | |
if args.do: | |
call(aria) | |
os.rmdir(directory) | |
os.rmdir(empty_dir) | |
def find_snapshots(): | |
files = os.listdir(republication_path) | |
snapshots = {snap for snap in files if os.path.isdir(snap) and re.match(r'^[0-9]{12}$', snap)} | |
return snapshots | |
def find_used_snapshots(): | |
files = os.listdir(current_path) | |
links = [os.path.join(current_path, l) for l in files if os.path.islink(os.path.join(current_path, l)) and l in languages] | |
snapshots = {os.path.basename(os.path.dirname(os.readlink(s))) for s in links} | |
return snapshots | |
def configure_logging(): | |
log = logging.getLogger('clear_snapshots') | |
log.setLevel(logging.DEBUG) | |
sh = logging.StreamHandler() | |
sh.setLevel(logging.DEBUG if args.debug else logging.INFO) | |
sh.setFormatter(debug_format if args.debug else info_format) | |
log.addHandler(sh) | |
if args.do: | |
fh = logging.FileHandler('clear_snapshots.log') | |
fh.setLevel(logging.DEBUG) | |
fh.setFormatter(debug_format) | |
log.addHandler(fh) | |
return log | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Delete snapshots not referenced by any symbolic link in current') | |
parser.add_argument('--do', default=False, action='store_true', help='By default, this script makes a dry run, this option makes it active. It will modify filesystem with this option') | |
parser.add_argument('--debug', default=False, action='store_true', help='Set logger level to DEBUG for stdout handler') | |
args = parser.parse_args() | |
log = configure_logging() | |
log.debug('starting at {}'.format(datetime.now().isoformat())) | |
main() | |
log.debug('completed at {}'.format(datetime.now().isoformat())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment