Skip to content

Instantly share code, notes, and snippets.

@AdrienHorgnies
Last active May 3, 2019 10:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AdrienHorgnies/2da12373a13804c742f1dd85caff9ea6 to your computer and use it in GitHub Desktop.
Save AdrienHorgnies/2da12373a13804c742f1dd85caff9ea6 to your computer and use it in GitHub Desktop.
get rid of directories which are not referenced by a symbolic link because they're useless in my use case
#!/usr/bin/env python3
import argparse
import logging
import os
import re
from datetime import datetime
from subprocess import call
from tempfile import mkdtemp
republication_path = '/data/activity_resources/republication'
current_path = os.path.join(republication_path, 'current')
languages = {'AR_EG', 'BG_BG', 'CS_CZ', 'DA_DK', 'DE_DE', 'EL_GR', 'EN_GB', 'EN_US', 'ES_ES', 'ET_EE', 'FI_FI', 'FR_FR', 'GA_IE', 'HR_HR', 'HU_HU', 'ID_ID', 'IT_IT', 'LT_LT', 'LV_LV', 'MT_MT', 'NL_NL', 'PT_BR', 'PT_PT', 'PL_PL', 'RO_RO', 'RU_RU', 'SK_SK', 'SL_SI', 'SV_SE', 'TR_TR', 'NB_NO'}
info_format = logging.Formatter(fmt='%(message)s')
debug_format = logging.Formatter(fmt='%(asctime)s %(name)s:%(lineno)d %(levelname)s %(message)s')
def main():
if not args.do:
log.info('This is a DRY RUN, use option --do to make a real run')
snapshots = find_snapshots()
log.debug('found {} snapshot(s): {}'.format(len(snapshots), snapshots))
used_snapshots = find_used_snapshots()
log.debug('found {} used snapshot(s): {}'.format(len(used_snapshots), used_snapshots))
useless_snapshots = snapshots - used_snapshots
log.debug('found {} useless snapshot(s): {}'.format(len(useless_snapshots), useless_snapshots))
for snap in useless_snapshots:
log.info('deleting {}'.format(snap))
delete_dir(os.path.join(republication_path, snap))
def delete_dir(directory):
empty_dir = mkdtemp()
# use rsync on empty directory because it is very efficient, it guarantees it deletes in the correct order
aria = ['rsync', '-a', '--delete', os.path.join(empty_dir, ''), directory]
log.debug(' '.join(aria))
if args.do:
call(aria)
os.rmdir(directory)
os.rmdir(empty_dir)
def find_snapshots():
files = os.listdir(republication_path)
snapshots = {snap for snap in files if os.path.isdir(snap) and re.match(r'^[0-9]{12}$', snap)}
return snapshots
def find_used_snapshots():
files = os.listdir(current_path)
links = [os.path.join(current_path, l) for l in files if os.path.islink(os.path.join(current_path, l)) and l in languages]
snapshots = {os.path.basename(os.path.dirname(os.readlink(s))) for s in links}
return snapshots
def configure_logging():
log = logging.getLogger('clear_snapshots')
log.setLevel(logging.DEBUG)
sh = logging.StreamHandler()
sh.setLevel(logging.DEBUG if args.debug else logging.INFO)
sh.setFormatter(debug_format if args.debug else info_format)
log.addHandler(sh)
if args.do:
fh = logging.FileHandler('clear_snapshots.log')
fh.setLevel(logging.DEBUG)
fh.setFormatter(debug_format)
log.addHandler(fh)
return log
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Delete snapshots not referenced by any symbolic link in current')
parser.add_argument('--do', default=False, action='store_true', help='By default, this script makes a dry run, this option makes it active. It will modify filesystem with this option')
parser.add_argument('--debug', default=False, action='store_true', help='Set logger level to DEBUG for stdout handler')
args = parser.parse_args()
log = configure_logging()
log.debug('starting at {}'.format(datetime.now().isoformat()))
main()
log.debug('completed at {}'.format(datetime.now().isoformat()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment