Skip to content

Instantly share code, notes, and snippets.

@bertrand-caron
Last active May 1, 2020 04:48
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bertrand-caron/e77c6b9364c90362f556fce5cf036880 to your computer and use it in GitHub Desktop.
Save bertrand-caron/e77c6b9364c90362f556fce5cf036880 to your computer and use it in GitHub Desktop.
List AWS buckets and their sizes
# Requires `awscli` (installed with pip) and a modern python version (>=3.6)
from datetime import datetime, timedelta
from subprocess import check_output
from json import loads, dumps
from typing import Tuple, NamedTuple
from multiprocessing import Pool
def sizeof_fmt(num: float, suffix: str = 'B') -> str:
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
class BucketSize(NamedTuple):
bucket_name: str
bucket_location: str
bytes: int
human_readable_bytes: str
def get_bucket_size(bucket_name: str) -> BucketSize:
bucket_location = loads(check_output(f"aws s3api get-bucket-location --bucket {bucket_name}", shell=True))["LocationConstraint"] or "us-east-1" # AWS API returns null for "us-east-1" :(
end_time = datetime.now()
start_time = end_time - timedelta(days=2)
def format_datetime(d: datetime) -> str:
return d.strftime("%Y-%m-%dT%H:%M:%S")
response = loads(
check_output(
f"""aws cloudwatch get-metric-statistics --namespace AWS/S3 --statistics Average --region {bucket_location} --metric-name BucketSizeBytes --dimensions Name=BucketName,Value={bucket_name} Name=StorageType,Value=StandardStorage --start-time {format_datetime(start_time)} --end-time {format_datetime(end_time)} --period 86400""",
shell=True,
),
)
if len(response["Datapoints"]) > 0:
size = response["Datapoints"][0]["Average"]
else:
# No metrics; bucket is most likely empty
size = 0.
return BucketSize(bucket_name, bucket_location, size, sizeof_fmt(size))
if __name__ == "__main__":
bucket_names = [
bucket["Name"]
for bucket in loads(check_output("aws s3api list-buckets", shell=True))["Buckets"]
]
with Pool(12) as pool:
data = pool.map(
get_bucket_size,
bucket_names
)
print(
dumps(
{
bucket_size.bucket_name: {
"bytes": bucket_size.bytes,
"human_readable_bytes": bucket_size.human_readable_bytes,
"location": bucket_size.bucket_location,
}
for bucket_size in data
},
indent=True,
),
)
@bertrand-caron
Copy link
Author

Note that I am currently only pulling the data for one StorageClass (StandardStorage); it could easily be extended to read across all storage classes (I could only find the API call for doing it one by one, if anyone knows of an API for getting it all at once please let me know) :).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment