Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Getting all objects in a S3 bucket (based on a prefix)
def get_files(bucket: str, prefix: str):
max_files = 100000
files = []
truncated = True
continuation_token = None
try:
response = s3_client.list_objects_v2(
Bucket=bucket,
Prefix=prefix,
MaxKeys=1000,
)
truncated = response.get("IsTruncated")
continuation_token = response.get("NextContinuationToken")
files = files + response.get("Contents", [])
LOG.info(f"len(files): {len(files)}")
while truncated and len(files) < max_files:
response = s3_client.list_objects_v2(
Bucket=bucket,
Prefix=prefix,
MaxKeys=1000,
ContinuationToken=continuation_token,
)
truncated = response.get("IsTruncated")
continuation_token = response.get("NextContinuationToken")
files = files + response.get("Contents", [])
LOG.info(f"len(files): {len(files)}")
return files
except Exception as ex:
LOG.error(f"{ex}")
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment