gurchik/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Analyze S3 Usage Report

Format an AWS Usage Report for S3 charges to make it simpler to compare your usage to the public S3 Pricing page.
For example, the pricing page lists Tier 1 (i.e. PUT, COPY, POST, and LIST) requests at one cost and Tier 2 (GET, SELECT, and all others) requests at a different cost. However, the Usage Report lists requests by API call (e.g. GetObject, ListBucket, etc). This is a bit annoying, so this script parses them into their respective billing tiers.
Instructions


Navigate to AWS Billing. Select "Cost & usage reports" then "Create a usage report"
Enter "Amazon Simple Storage Service" as the Service, "All usage types" for the Usage Type, "All Operations" for the Operations. Then select a time period and granularity.
Download the report as a CSV.
Run tail -1 <FILE> - if the output is "The report for the period and values you specified was too large..." then you must select a shorter time period or a larger granularity (or both) and redownload the file.
Run python3 analyze_s3_usage_report.py <FILE>


## analyze_s3_usage_report.py
import sys
import csv
from collections import defaultdict
import re

CSV_PATH = sys.argv[1]

def load_csv():
    ret = []
    path = sys.argv[1]
    with open(CSV_PATH) as file:
        reader = csv.DictReader(file)
        for row in reader:
            parsed = {
                "bucket": row[" Resource"],
                "usage_type": row[" UsageType"],
                "usage_value": int(row[" UsageValue"]),
            }
            if not parsed["bucket"]:
                continue
            if "Out-Bytes" not in parsed["usage_type"] and "Requests" not in parsed["usage_type"]:
                continue
            ret.append(parsed)
    return ret

def parse_usage_type(usage):
    # Based on: https://docs.aws.amazon.com/AmazonS3/latest/userguide/aws-usage-report-understand.html
    if re.match(r'(\w+-)?DataTransfer-Out-Bytes', usage): # The amount of data transferred from Amazon S3 to the internet
        return "DataTransfer-ToInternet"
    if (match := re.match(r'(\w+-)?(\w+)-AWS-Out-Bytes', usage)): # The amount of data transferred from AWS Region1 to AWS Region2
        region = match.group(2)
        return f"DataTransfer-To{region}"
    if re.match(r'(\w+-)?C3DataTransfer-Out-Bytes', usage): # The amount of data transferred from Amazon S3 to Amazon EC2 within the same AWS Region
        return "DataTransfer-ToEC2-NoCharge"
    if re.match(r'(\w+-)?CloudFront-Out-Bytes', usage): # The amount of data transferred from an AWS Region to a CloudFront distribution
        return "DataTransfer-ToCloudFront-NoCharge"
    if (match := re.match(r'(\w+-)?Requests-(\w+)', usage)):
        tier = match.group(2)
        return f"Requests-{tier}"
    if re.match(r'(\w+-)?AMZN-Out-Bytes', usage): # Undocumented. From S3 Support: "Traffic went to an AWS IP, but the IP is not included in any region's ranges. Because the destination region is unknown, there is no charge."
        return "DataTransfer-ToAWS-NoCharge"
    else:
        raise Exception(f"Unknown usage type '{usage}'")

def parse_report(csv):
    resources = defaultdict(lambda: defaultdict(int))
    for row in csv:
        bucket = row["bucket"]
        usage_value = row["usage_value"]
        usage_type = parse_usage_type(row["usage_type"])
        resources[bucket][usage_type] += usage_value
    return resources

def print_report(report):
    print("Bucket,UsageType,UsageAmount")
    for bucket, usages in report.items():
        for usage_type, usage_amount in usages.items():
            print(f"{bucket},{usage_type},{usage_amount}")

if __name__ == "__main__":
    csv = load_csv()
    report = parse_report(csv)
    print_report(report)
	import sys
	import csv
	from collections import defaultdict
	import re

	CSV_PATH = sys.argv[1]

	def load_csv():
	ret = []
	path = sys.argv[1]
	with open(CSV_PATH) as file:
	reader = csv.DictReader(file)
	for row in reader:
	parsed = {
	"bucket": row[" Resource"],
	"usage_type": row[" UsageType"],
	"usage_value": int(row[" UsageValue"]),
	}
	if not parsed["bucket"]:
	continue
	if "Out-Bytes" not in parsed["usage_type"] and "Requests" not in parsed["usage_type"]:
	continue
	ret.append(parsed)
	return ret

	def parse_usage_type(usage):
	# Based on: https://docs.aws.amazon.com/AmazonS3/latest/userguide/aws-usage-report-understand.html
	if re.match(r'(\w+-)?DataTransfer-Out-Bytes', usage): # The amount of data transferred from Amazon S3 to the internet
	return "DataTransfer-ToInternet"
	if (match := re.match(r'(\w+-)?(\w+)-AWS-Out-Bytes', usage)): # The amount of data transferred from AWS Region1 to AWS Region2
	region = match.group(2)
	return f"DataTransfer-To{region}"
	if re.match(r'(\w+-)?C3DataTransfer-Out-Bytes', usage): # The amount of data transferred from Amazon S3 to Amazon EC2 within the same AWS Region
	return "DataTransfer-ToEC2-NoCharge"
	if re.match(r'(\w+-)?CloudFront-Out-Bytes', usage): # The amount of data transferred from an AWS Region to a CloudFront distribution
	return "DataTransfer-ToCloudFront-NoCharge"
	if (match := re.match(r'(\w+-)?Requests-(\w+)', usage)):
	tier = match.group(2)
	return f"Requests-{tier}"
	if re.match(r'(\w+-)?AMZN-Out-Bytes', usage): # Undocumented. From S3 Support: "Traffic went to an AWS IP, but the IP is not included in any region's ranges. Because the destination region is unknown, there is no charge."
	return "DataTransfer-ToAWS-NoCharge"
	else:
	raise Exception(f"Unknown usage type '{usage}'")

	def parse_report(csv):
	resources = defaultdict(lambda: defaultdict(int))
	for row in csv:
	bucket = row["bucket"]
	usage_value = row["usage_value"]
	usage_type = parse_usage_type(row["usage_type"])
	resources[bucket][usage_type] += usage_value
	return resources

	def print_report(report):
	print("Bucket,UsageType,UsageAmount")
	for bucket, usages in report.items():
	for usage_type, usage_amount in usages.items():
	print(f"{bucket},{usage_type},{usage_amount}")

	if __name__ == "__main__":
	csv = load_csv()
	report = parse_report(csv)
	print_report(report)