|
import sys |
|
import csv |
|
from collections import defaultdict |
|
import re |
|
|
|
CSV_PATH = sys.argv[1] |
|
|
|
def load_csv(): |
|
ret = [] |
|
path = sys.argv[1] |
|
with open(CSV_PATH) as file: |
|
reader = csv.DictReader(file) |
|
for row in reader: |
|
parsed = { |
|
"bucket": row[" Resource"], |
|
"usage_type": row[" UsageType"], |
|
"usage_value": int(row[" UsageValue"]), |
|
} |
|
if not parsed["bucket"]: |
|
continue |
|
if "Out-Bytes" not in parsed["usage_type"] and "Requests" not in parsed["usage_type"]: |
|
continue |
|
ret.append(parsed) |
|
return ret |
|
|
|
def parse_usage_type(usage): |
|
# Based on: https://docs.aws.amazon.com/AmazonS3/latest/userguide/aws-usage-report-understand.html |
|
if re.match(r'(\w+-)?DataTransfer-Out-Bytes', usage): # The amount of data transferred from Amazon S3 to the internet |
|
return "DataTransfer-ToInternet" |
|
if (match := re.match(r'(\w+-)?(\w+)-AWS-Out-Bytes', usage)): # The amount of data transferred from AWS Region1 to AWS Region2 |
|
region = match.group(2) |
|
return f"DataTransfer-To{region}" |
|
if re.match(r'(\w+-)?C3DataTransfer-Out-Bytes', usage): # The amount of data transferred from Amazon S3 to Amazon EC2 within the same AWS Region |
|
return "DataTransfer-ToEC2-NoCharge" |
|
if re.match(r'(\w+-)?CloudFront-Out-Bytes', usage): # The amount of data transferred from an AWS Region to a CloudFront distribution |
|
return "DataTransfer-ToCloudFront-NoCharge" |
|
if (match := re.match(r'(\w+-)?Requests-(\w+)', usage)): |
|
tier = match.group(2) |
|
return f"Requests-{tier}" |
|
if re.match(r'(\w+-)?AMZN-Out-Bytes', usage): # Undocumented. From S3 Support: "Traffic went to an AWS IP, but the IP is not included in any region's ranges. Because the destination region is unknown, there is no charge." |
|
return "DataTransfer-ToAWS-NoCharge" |
|
else: |
|
raise Exception(f"Unknown usage type '{usage}'") |
|
|
|
def parse_report(csv): |
|
resources = defaultdict(lambda: defaultdict(int)) |
|
for row in csv: |
|
bucket = row["bucket"] |
|
usage_value = row["usage_value"] |
|
usage_type = parse_usage_type(row["usage_type"]) |
|
resources[bucket][usage_type] += usage_value |
|
return resources |
|
|
|
def print_report(report): |
|
print("Bucket,UsageType,UsageAmount") |
|
for bucket, usages in report.items(): |
|
for usage_type, usage_amount in usages.items(): |
|
print(f"{bucket},{usage_type},{usage_amount}") |
|
|
|
if __name__ == "__main__": |
|
csv = load_csv() |
|
report = parse_report(csv) |
|
print_report(report) |