Skip to content

Instantly share code, notes, and snippets.

@aSipiere
Created October 28, 2019 10:28
Show Gist options
  • Save aSipiere/99da73145ef9c9201418582f5bd15a59 to your computer and use it in GitHub Desktop.
Save aSipiere/99da73145ef9c9201418582f5bd15a59 to your computer and use it in GitHub Desktop.
Bulk renaming extensions in s3
import boto3
import botocore
from joblib import Parallel, delayed
import os
bucket_name = "bucket_name"
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucketname)
exists = True
try:
s3.meta.client.head_bucket(Bucket=bucketname)
except botocore.exceptions.ClientError as e:
# If a client error is thrown, then check that it was a 404 error.
# If it was a 404 error, then the bucket does not exist.
error_code = e.response['Error']['Code']
if error_code == '404':
exists = False
source = "source_prefix"
target = "target_prefix"
def copier(key, bucket_name, source, target):
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucketname)
exists = True
try:
s3.meta.client.head_bucket(Bucket=bucketname)
except botocore.exceptions.ClientError as e:
# If a client error is thrown, then check that it was a 404 error.
# If it was a 404 error, then the bucket does not exist.
error_code = e.response['Error']['Code']
if error_code == '404':
exists = False
source_filename = key.split('/')[-1]
target_filename = "{}/{}.gz".format(target, os.path.splitext(source_filename)[0])
print(source_filename, target_filename)
print('{}/{}/{}'.format(bucket, source, source_filename))
s3.Object(bucketname,target_filename).copy_from(CopySource='{}/{}/{}'.format(bucketname, source, source_filename))
# obj.delete()
keys = []
for obj in bucket.objects.filter(Prefix=source):
keys.append(obj.key)
Parallel(n_jobs=-1)(delayed(copier)(key) for key in keys)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment