Last active
May 1, 2020 04:48
-
-
Save bertrand-caron/e77c6b9364c90362f556fce5cf036880 to your computer and use it in GitHub Desktop.
List AWS buckets and their sizes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Requires `awscli` (installed with pip) and a modern python version (>=3.6) | |
from datetime import datetime, timedelta | |
from subprocess import check_output | |
from json import loads, dumps | |
from typing import Tuple, NamedTuple | |
from multiprocessing import Pool | |
def sizeof_fmt(num: float, suffix: str = 'B') -> str: | |
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: | |
if abs(num) < 1024.0: | |
return "%.1f%s%s" % (num, unit, suffix) | |
num /= 1024.0 | |
return "%.1f%s%s" % (num, 'Yi', suffix) | |
class BucketSize(NamedTuple): | |
bucket_name: str | |
bucket_location: str | |
bytes: int | |
human_readable_bytes: str | |
def get_bucket_size(bucket_name: str) -> BucketSize: | |
bucket_location = loads(check_output(f"aws s3api get-bucket-location --bucket {bucket_name}", shell=True))["LocationConstraint"] or "us-east-1" # AWS API returns null for "us-east-1" :( | |
end_time = datetime.now() | |
start_time = end_time - timedelta(days=2) | |
def format_datetime(d: datetime) -> str: | |
return d.strftime("%Y-%m-%dT%H:%M:%S") | |
response = loads( | |
check_output( | |
f"""aws cloudwatch get-metric-statistics --namespace AWS/S3 --statistics Average --region {bucket_location} --metric-name BucketSizeBytes --dimensions Name=BucketName,Value={bucket_name} Name=StorageType,Value=StandardStorage --start-time {format_datetime(start_time)} --end-time {format_datetime(end_time)} --period 86400""", | |
shell=True, | |
), | |
) | |
if len(response["Datapoints"]) > 0: | |
size = response["Datapoints"][0]["Average"] | |
else: | |
# No metrics; bucket is most likely empty | |
size = 0. | |
return BucketSize(bucket_name, bucket_location, size, sizeof_fmt(size)) | |
if __name__ == "__main__": | |
bucket_names = [ | |
bucket["Name"] | |
for bucket in loads(check_output("aws s3api list-buckets", shell=True))["Buckets"] | |
] | |
with Pool(12) as pool: | |
data = pool.map( | |
get_bucket_size, | |
bucket_names | |
) | |
print( | |
dumps( | |
{ | |
bucket_size.bucket_name: { | |
"bytes": bucket_size.bytes, | |
"human_readable_bytes": bucket_size.human_readable_bytes, | |
"location": bucket_size.bucket_location, | |
} | |
for bucket_size in data | |
}, | |
indent=True, | |
), | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Note that I am currently only pulling the data for one
StorageClass
(StandardStorage
); it could easily be extended to read across all storage classes (I could only find the API call for doing it one by one, if anyone knows of an API for getting it all at once please let me know) :).