Last active
October 1, 2018 21:37
-
-
Save Niklas9/42f150b6c643c7650e450aaabb302ad6 to your computer and use it in GitHub Desktop.
sync diff items in AWS S3 buckets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python | |
import os | |
import sys | |
if not len(sys.argv) == 3: | |
print('need two input args, source and target files') | |
def parse_aws_s3_summary_file(fp): | |
l = [] | |
with open(fp, 'r') as f: | |
for row in f: | |
line = row.split(' ') | |
if '.bz2' in line[-1]: | |
file_ref = line[-1].replace('\n', '') | |
if 'XXX/' in file_ref: | |
file_ref = file_ref.replace('XXX/', '') | |
l.append(file_ref) | |
return set(l) | |
if __name__ == '__main__': | |
fp_source = sys.argv[1] | |
fp_target = sys.argv[2] | |
set1_source = parse_aws_s3_summary_file(fp_source) | |
set2_target = parse_aws_s3_summary_file(fp_target) | |
diff_list = list(set1_source - set2_target) | |
print('source contained: {0:d}'.format(len(set1_source))) | |
print('target contained: {0:d}'.format(len(set2_target))) | |
print('identified diff: {0:d}'.format(len(diff_list))) | |
# clear memory for unneeded items | |
set1_source = None | |
set2_target = None | |
try: | |
input('\nPress enter to continue or ctrl-c to exit') | |
except KeyboardInterrupt: | |
sys.exit(1) | |
for fp in diff_list: | |
cmd = 'aws s3 cp s3://XXX/{0} s3://YYY/{0}'.format(fp) | |
print(cmd) | |
#os.system(cmd) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment