Skip to content

Instantly share code, notes, and snippets.

@l1x
Created July 21, 2022 12:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save l1x/52b2e54ef98e9acf8b3a1dde02c89e07 to your computer and use it in GitHub Desktop.
Save l1x/52b2e54ef98e9acf8b3a1dde02c89e07 to your computer and use it in GitHub Desktop.
Getting all objects in a S3 bucket (based on a prefix)
def get_files(bucket: str, prefix: str):
max_files = 100000
files = []
truncated = True
continuation_token = None
try:
response = s3_client.list_objects_v2(
Bucket=bucket,
Prefix=prefix,
MaxKeys=1000,
)
truncated = response.get("IsTruncated")
continuation_token = response.get("NextContinuationToken")
files = files + response.get("Contents", [])
LOG.info(f"len(files): {len(files)}")
while truncated and len(files) < max_files:
response = s3_client.list_objects_v2(
Bucket=bucket,
Prefix=prefix,
MaxKeys=1000,
ContinuationToken=continuation_token,
)
truncated = response.get("IsTruncated")
continuation_token = response.get("NextContinuationToken")
files = files + response.get("Contents", [])
LOG.info(f"len(files): {len(files)}")
return files
except Exception as ex:
LOG.error(f"{ex}")
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment