Skip to content

Instantly share code, notes, and snippets.

@justinnaldzin
Last active June 10, 2020 04:39
Show Gist options
  • Save justinnaldzin/79f46eeb5818cd58134707612e90ac96 to your computer and use it in GitHub Desktop.
Save justinnaldzin/79f46eeb5818cd58134707612e90ac96 to your computer and use it in GitHub Desktop.
Listing objects, keys, versions, and delete markers in an S3 bucket.
import boto3
def get_matching_s3_objects(bucket, prefix="", suffix=""):
"""
Generate objects in an S3 bucket.
:param bucket: Name of the S3 bucket.
:param prefix: Only fetch objects whose key starts with this prefix (optional).
:param suffix: Only fetch objects whose keys end with this suffix (optional).
"""
s3 = boto3.client("s3")
paginator = s3.get_paginator("list_objects_v2")
kwargs = {'Bucket': bucket}
# Include the bucket and prefix in kwargs to the S3 API. The prefix can be string, tuple, or list of prefixes.
if isinstance(prefix, str):
prefixes = (prefix, )
else:
prefixes = prefix
for key_prefix in prefixes:
kwargs["Prefix"] = key_prefix
for page in paginator.paginate(**kwargs):
contents = page.get("Contents", [])
for obj in contents:
key = obj["Key"]
if key.endswith(suffix):
yield obj
def get_matching_s3_keys(bucket, prefix="", suffix=""):
"""
Generate the keys in an S3 bucket.
:param bucket: Name of the S3 bucket.
:param prefix: Only fetch keys that start with this prefix (optional).
:param suffix: Only fetch keys that end with this suffix (optional).
"""
for obj in get_matching_s3_objects(bucket, prefix, suffix):
yield obj["Key"]
def get_s3_object_versions(bucket, prefix="", suffix=""):
"""
Generate all versions and delete markers for objects in an S3 bucket.
:param bucket: Name of the S3 bucket.
:param prefix: Only fetch objects whose key starts with this prefix (optional).
:param suffix: Only fetch objects whose keys end with this suffix (optional).
"""
s3 = boto3.client("s3")
paginator = s3.get_paginator('list_object_versions')
kwargs = {'Bucket': bucket}
# Include the bucket and prefix in kwargs to the S3 API. The prefix can be string, tuple, or list of prefixes.
if isinstance(prefix, str):
prefixes = (prefix, )
else:
prefixes = prefix
for key_prefix in prefixes:
kwargs["Prefix"] = key_prefix
for page in paginator.paginate(**kwargs):
versions = page.get("Versions", [])
versions.extend(page.get("DeleteMarkers", []))
for obj in versions:
key = obj["Key"]
if key.endswith(suffix) and obj['VersionId'] != 'null':
yield obj
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment