Skip to content

Instantly share code, notes, and snippets.

@domarps
Created October 19, 2018 16:45
Show Gist options
  • Save domarps/f4532f7e5d22efc74b3cca87d769ee11 to your computer and use it in GitHub Desktop.
Save domarps/f4532f7e5d22efc74b3cca87d769ee11 to your computer and use it in GitHub Desktop.
# Copy files from one directory to another directory in the same S3 bucket
all_tsvs = set([line.rstrip('\n') for line in open('all_files.log')])
processed_tsvs = set([line.rstrip('\n') for line in open('processed.log')])
to_do_tsvs = [elem.split('s3://psriniva/')[-1] for elem in list(all_tsvs - processed_tsvs)]
to_do_tsvs[-1]
import boto3
s3_resource = boto3.resource('s3')
source_bucket = 'psriniva'
key = 'es-snapshot-130M-tsv/part-03338'
print(key)
def copy_to_bucket(bucket_name, src_key_name, dst_key_name):
copy_source = {
'Bucket': bucket_name,
'Key': src_key_name
}
s3_resource.Object(bucket_name, dst_key_name).copy(copy_source)
bucket_name = 'psriniva'
for src_key_name in to_do_tsvs:
dst_key_name = 'rem-tsv/' + src_key_name.split('/')[-1]
copy_to_bucket(bucket_name, src_key_name, dst_key_name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment