Skip to content

Instantly share code, notes, and snippets.

@AntonOsika
Last active March 15, 2018 14:20
Show Gist options
  • Save AntonOsika/67bf6bccd7aede1a1d4d7d069ebd2cbb to your computer and use it in GitHub Desktop.
Save AntonOsika/67bf6bccd7aede1a1d4d7d069ebd2cbb to your computer and use it in GitHub Desktop.
from __future__ import print_function
import datetime
def excluded_datetimes(start_str):
"""
Creates regex matching every datetime chronologically before start_str.
Can be used to exclude files/folders with e.g. rsync file downloads.
"""
# Datetime format:
fmt = '%Y%m%d_%H%M%S'
in_len = len(start_str)
if (len(fmt) - in_len) % 2 == 0:
# Make input match datetime format if substring:
fmt = fmt[:in_len]
else:
print("start datetime given in wrong format")
return
start_dt = datetime.datetime.strptime(start_str, fmt)
excluded = []
start_dt_c = start_dt
# Walk backwards in time and exclude everything in: [year 0 AC, start_str):
while start_dt_c.second > 0:
start_dt_c = start_dt_c - datetime.timedelta(seconds=1)
excluded.append(start_dt_c.strftime(fmt))
while start_dt_c.minute > 0:
start_dt_c = start_dt_c - datetime.timedelta(minutes=1)
excluded.append(start_dt_c.strftime(fmt[:-2]) + '\d{2}')
while start_dt_c.hour > 0:
start_dt_c = start_dt_c - datetime.timedelta(hours=1)
excluded.append(start_dt_c.strftime(fmt[:-4]) + '\d{4}')
while start_dt_c.month == start_dt.month:
start_dt_c = start_dt_c - datetime.timedelta(days=1)
excluded.append(start_dt_c.strftime(fmt[:-7]) + '_\d{6}')
while start_dt_c.year == start_dt.year:
start_dt_c = start_dt_c - datetime.timedelta(weeks=1)
excluded.append(start_dt_c.strftime(fmt[:-9]) + '\d{2}_\d{6}')
excluded += [str(y) + '\d{4}_\d{6}' for y in range(2000, start_dt.year)]
excluded += ['1' + '\d{7}_\d{6}', '0' + '\d{7}_\d{6}']
exclude_str = '.*|.*'.join(excluded)
exclude_str = '.*' + exclude_str + '.*'
return exclude_str
# If used to sync GCS with gsutil this file can be run:
# (In which case it should be renamed to e.g. rsync_selection.py)
import argparse
from subprocess import call
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('source')
parser.add_argument('dest')
parser.add_argument('start')
parser.add_argument('--exclude', default='')
flags = parser.parse_args()
start_str = flags.start
exclude_str = excluded_times(start_str)
if flags.exclude:
exclude_str += '|' + flags.exclude
cmd = 'gsutil -m rsync -r -x {} {} {}'.format(exclude_str, flags.source, flags.dest)
print("Runnning rsync command:")
print(cmd)
call(cmd.split())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment