Skip to content

Instantly share code, notes, and snippets.

@Niklas9
Last active October 1, 2018 21:37
Show Gist options
  • Save Niklas9/42f150b6c643c7650e450aaabb302ad6 to your computer and use it in GitHub Desktop.
Save Niklas9/42f150b6c643c7650e450aaabb302ad6 to your computer and use it in GitHub Desktop.
sync diff items in AWS S3 buckets
#!/bin/python
import os
import sys
if not len(sys.argv) == 3:
print('need two input args, source and target files')
def parse_aws_s3_summary_file(fp):
l = []
with open(fp, 'r') as f:
for row in f:
line = row.split(' ')
if '.bz2' in line[-1]:
file_ref = line[-1].replace('\n', '')
if 'XXX/' in file_ref:
file_ref = file_ref.replace('XXX/', '')
l.append(file_ref)
return set(l)
if __name__ == '__main__':
fp_source = sys.argv[1]
fp_target = sys.argv[2]
set1_source = parse_aws_s3_summary_file(fp_source)
set2_target = parse_aws_s3_summary_file(fp_target)
diff_list = list(set1_source - set2_target)
print('source contained: {0:d}'.format(len(set1_source)))
print('target contained: {0:d}'.format(len(set2_target)))
print('identified diff: {0:d}'.format(len(diff_list)))
# clear memory for unneeded items
set1_source = None
set2_target = None
try:
input('\nPress enter to continue or ctrl-c to exit')
except KeyboardInterrupt:
sys.exit(1)
for fp in diff_list:
cmd = 'aws s3 cp s3://XXX/{0} s3://YYY/{0}'.format(fp)
print(cmd)
#os.system(cmd)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment