Skip to content

Instantly share code, notes, and snippets.

@karpie28
Last active June 20, 2022 14:27
Show Gist options
  • Save karpie28/0eac35e901b41f62f72ab13ab657c598 to your computer and use it in GitHub Desktop.
Save karpie28/0eac35e901b41f62f72ab13ab657c598 to your computer and use it in GitHub Desktop.
List all public files within all S3 buckets on your AWS account
# This snippet works only for small subset of files. Consider using S3 Inventory and Amazon Athena for bigger datasets.
from typing import Iterator
import boto3
import requests
s3 = boto3.client('s3')
all_buckets = [
bucket_dict['Name'] for bucket_dict in
s3.list_buckets()['Buckets']
]
def list_objs(bucket: str) -> Iterator[str]:
"""
Generator yielding all object names in the bucket. Potentially requires
multiple requests for large buckets since list_objects is capped at 1000
objects returned per call.
"""
response = s3.list_objects_v2(Bucket=bucket)
while True:
if 'Contents' not in response:
# Happens if bucket is empty
return
for obj_dict in response['Contents']:
yield obj_dict['Key']
last_key = obj_dict['Key']
if response['IsTruncated']:
response = s3.list_objects_v2(Bucket=bucket, StartAfter=last_key)
else:
return
def is_public(bucket: str, region: str, obj: str) -> bool:
url = f'https://{bucket}.s3.{region}.amazonaws.com/{obj}'
resp = requests.head(url)
if resp.status_code == 200:
return True
elif resp.status_code == 403:
return False
else:
raise Exception(f'Unexpected HTTP code {resp.status_code} from {url}')
for bucket in all_buckets:
print(f"Bucket: {bucket}")
region = s3.get_bucket_location(Bucket=bucket)['LocationConstraint']
for obj in list_objs(bucket):
if is_public(bucket, region, obj):
print(f'{bucket}/{obj} is public')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment