sigma23/rename_or_copy_bulk_aws.py

## rename_or_copy_bulk_aws.py
# matching functions from https://alexwlchan.net/2017/07/listing-s3-keys/
# https://alexwlchan.net/2018/01/listing-s3-keys-redux/

import boto3
import datetime
import os


def get_matching_s3_objects(bucket, prefix='', suffix=''):
    """
    Generate objects in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch objects whose key starts with
        this prefix (optional).
    :param suffix: Only fetch objects whose keys end with
        this suffix (optional).
    """
    s3 = boto3.client('s3')
    kwargs = {'Bucket': bucket}

    # If the prefix is a single string (not a tuple of strings), we can
    # do the filtering directly in the S3 API.
    if isinstance(prefix, str):
        kwargs['Prefix'] = prefix

    while True:

        # The S3 API response is a large blob of metadata.
        # 'Contents' contains information about the listed objects.
        resp = s3.list_objects_v2(**kwargs)

        try:
            contents = resp['Contents']
        except KeyError:
            return

        for obj in contents:
            key = obj['Key']
            if key.startswith(prefix) and key.endswith(suffix):
                yield obj

        # The S3 API is paginated, returning up to 1000 keys at a time.
        # Pass the continuation token into the next response, until we
        # reach the final page (when this field is missing).
        try:
            kwargs['ContinuationToken'] = resp['NextContinuationToken']
        except KeyError:
            break


def get_matching_s3_keys(bucket, prefix='', suffix=''):
    """
    Generate the keys in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch keys that start with this prefix (optional).
    :param suffix: Only fetch keys that end with this suffix (optional).
    """
    for obj in get_matching_s3_objects(bucket, prefix, suffix):
        yield obj['Key']

bucket_name = 'my_s3_bucket'
key_prefix = 'blahdir/other_dir/sometext'
files = get_matching_s3_keys(bucket = bucket_name,
                             prefix=key_prefix,
                             suffix='.txt') # only get the files s3://my_s3_bucket/blahdir/other_dir/sometext*.txt
bucket = 'my_s3_bucket'
new_key = 'blahdir/other_dir/sometext'
for each in files:
    new_file = new_key + each.split("/")[2].split(".txt")[0] + '.json' #copy .txt files to .json for example
    print("Copying: ", new_file)
    copy_source = {'Bucket': bucket, 'Key': each}
    dev_client.copy_object(CopySource = copy_source, Bucket = bucket, Key = new_file)
    #dev_client.delete_object(Bucket = bucket, Key = 'my_folder/my_file') # uncomment this if want to rename only and not copy
	# matching functions from https://alexwlchan.net/2017/07/listing-s3-keys/
	# https://alexwlchan.net/2018/01/listing-s3-keys-redux/

	import boto3
	import datetime
	import os



	def get_matching_s3_objects(bucket, prefix='', suffix=''):
	"""
	Generate objects in an S3 bucket.

	:param bucket: Name of the S3 bucket.
	:param prefix: Only fetch objects whose key starts with
	this prefix (optional).
	:param suffix: Only fetch objects whose keys end with
	this suffix (optional).
	"""
	s3 = boto3.client('s3')
	kwargs = {'Bucket': bucket}

	# If the prefix is a single string (not a tuple of strings), we can
	# do the filtering directly in the S3 API.
	if isinstance(prefix, str):
	kwargs['Prefix'] = prefix

	while True:

	# The S3 API response is a large blob of metadata.
	# 'Contents' contains information about the listed objects.
	resp = s3.list_objects_v2(**kwargs)

	try:
	contents = resp['Contents']
	except KeyError:
	return

	for obj in contents:
	key = obj['Key']
	if key.startswith(prefix) and key.endswith(suffix):
	yield obj

	# The S3 API is paginated, returning up to 1000 keys at a time.
	# Pass the continuation token into the next response, until we
	# reach the final page (when this field is missing).
	try:
	kwargs['ContinuationToken'] = resp['NextContinuationToken']
	except KeyError:
	break


	def get_matching_s3_keys(bucket, prefix='', suffix=''):
	"""
	Generate the keys in an S3 bucket.

	:param bucket: Name of the S3 bucket.
	:param prefix: Only fetch keys that start with this prefix (optional).
	:param suffix: Only fetch keys that end with this suffix (optional).
	"""
	for obj in get_matching_s3_objects(bucket, prefix, suffix):
	yield obj['Key']

	bucket_name = 'my_s3_bucket'
	key_prefix = 'blahdir/other_dir/sometext'
	files = get_matching_s3_keys(bucket = bucket_name,
	prefix=key_prefix,
	suffix='.txt') # only get the files s3://my_s3_bucket/blahdir/other_dir/sometext*.txt
	bucket = 'my_s3_bucket'
	new_key = 'blahdir/other_dir/sometext'
	for each in files:
	new_file = new_key + each.split("/")[2].split(".txt")[0] + '.json' #copy .txt files to .json for example
	print("Copying: ", new_file)
	copy_source = {'Bucket': bucket, 'Key': each}
	dev_client.copy_object(CopySource = copy_source, Bucket = bucket, Key = new_file)
	#dev_client.delete_object(Bucket = bucket, Key = 'my_folder/my_file') # uncomment this if want to rename only and not copy