Skip to content

Instantly share code, notes, and snippets.

@sigma23
Created January 25, 2018 18:31
Show Gist options
  • Save sigma23/1aa2d280a2b579f64820d80be65752c6 to your computer and use it in GitHub Desktop.
Save sigma23/1aa2d280a2b579f64820d80be65752c6 to your computer and use it in GitHub Desktop.
Rename a bulk amount of s3 files using boto3
# matching functions from https://alexwlchan.net/2017/07/listing-s3-keys/
# https://alexwlchan.net/2018/01/listing-s3-keys-redux/
import boto3
import datetime
import os
def get_matching_s3_objects(bucket, prefix='', suffix=''):
"""
Generate objects in an S3 bucket.
:param bucket: Name of the S3 bucket.
:param prefix: Only fetch objects whose key starts with
this prefix (optional).
:param suffix: Only fetch objects whose keys end with
this suffix (optional).
"""
s3 = boto3.client('s3')
kwargs = {'Bucket': bucket}
# If the prefix is a single string (not a tuple of strings), we can
# do the filtering directly in the S3 API.
if isinstance(prefix, str):
kwargs['Prefix'] = prefix
while True:
# The S3 API response is a large blob of metadata.
# 'Contents' contains information about the listed objects.
resp = s3.list_objects_v2(**kwargs)
try:
contents = resp['Contents']
except KeyError:
return
for obj in contents:
key = obj['Key']
if key.startswith(prefix) and key.endswith(suffix):
yield obj
# The S3 API is paginated, returning up to 1000 keys at a time.
# Pass the continuation token into the next response, until we
# reach the final page (when this field is missing).
try:
kwargs['ContinuationToken'] = resp['NextContinuationToken']
except KeyError:
break
def get_matching_s3_keys(bucket, prefix='', suffix=''):
"""
Generate the keys in an S3 bucket.
:param bucket: Name of the S3 bucket.
:param prefix: Only fetch keys that start with this prefix (optional).
:param suffix: Only fetch keys that end with this suffix (optional).
"""
for obj in get_matching_s3_objects(bucket, prefix, suffix):
yield obj['Key']
bucket_name = 'my_s3_bucket'
key_prefix = 'blahdir/other_dir/sometext'
files = get_matching_s3_keys(bucket = bucket_name,
prefix=key_prefix,
suffix='.txt') # only get the files s3://my_s3_bucket/blahdir/other_dir/sometext*.txt
bucket = 'my_s3_bucket'
new_key = 'blahdir/other_dir/sometext'
for each in files:
new_file = new_key + each.split("/")[2].split(".txt")[0] + '.json' #copy .txt files to .json for example
print("Copying: ", new_file)
copy_source = {'Bucket': bucket, 'Key': each}
dev_client.copy_object(CopySource = copy_source, Bucket = bucket, Key = new_file)
#dev_client.delete_object(Bucket = bucket, Key = 'my_folder/my_file') # uncomment this if want to rename only and not copy
@dalmosantos
Copy link

`import boto3
import os
from datetime import datetime
prefix = 'abc_'
new_prefix = f"{prefix}{datetime.today().strftime('%Y-%m-%d')}"
suffix = 'csv.gz'
bucket_name = 'mybucketname'

def lambda_handler(event, context):
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
for obj in bucket.objects.all():
key = obj.key
path_part = os.path.dirname(key)
filename = os.path.basename(key)
copy_source = {
'Bucket': bucket_name,
'Key': key
}
if filename.startswith(prefix) and filename.endswith(suffix):
new_key = f"{new_prefix}{key.split('')[1]}"
full_key_with_path = os.path.join(path_part, new_key)
destination_bucket = s3.Bucket(bucket_name)
print(f'copying the object with new key : {full_key_with_path}')
destination_bucket.copy(copy_source, full_key_with_path)
print(f'deleting old key : {key}')
s3.Object(bucket_name, key).delete()`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment