Skip to content

Instantly share code, notes, and snippets.

@ktstevenson
Created September 1, 2017 17:30
Show Gist options
  • Save ktstevenson/dd4c87288047b9c1a21e7ba24fd5aaac to your computer and use it in GitHub Desktop.
Save ktstevenson/dd4c87288047b9c1a21e7ba24fd5aaac to your computer and use it in GitHub Desktop.
Normalize a DLT billing file. Removes $0 and summary lines. Exports a TSV file suitable for processing by Athena
import sys
import csv
INPUT_FILE = sys.argv[1]
# Define our canonical field list
fields = ['InvoiceID', 'PayerAccountId', 'LinkedAccountId', 'RecordType',
'RecordId', 'ProductName', 'RateId', 'SubscriptionId', 'PricingPlanId',
'UsageType', 'Operation', 'AvailabilityZone', 'ReservedInstance',
'ItemDescription', 'UsageStartDate', 'UsageEndDate', 'UsageQuantity',
'BlendedRate', 'BlendedCost', 'UnBlendedRate', 'UnBlendedCost', 'ResourceId',
'user:Application', 'user:Company', 'user:Contract', 'user:CostCode',
'user:Creation Date', 'user:Creator', 'user:Department', 'user:DeptCode',
'user:Environment', 'user:Grant', 'user:Location', 'user:Name', 'user:OU',
'user:Order', 'user:Organization', 'user:Owner', 'user:Payer', 'user:Product',
'user:Project', 'user:ProjectName', 'user:ProjectNumber', 'user:ProjectType',
'user:Purpose', 'user:ResponsibleParty', 'user:Role', 'user:Service',
'user:Status', 'user:Use', 'user:CA001', 'user:CA002', 'user:CA003',
'user:CA004', 'user:CA005', 'user:CA006', 'user:CA007', 'user:CA008',
'user:CA009', 'user:CA010']
with open(INPUT_FILE, 'rb') as f:
reader = csv.DictReader(f)
for row in reader:
# Throw out $0 items and summary lines
if float(row['UnBlendedCost']) and row['Operation']:
# Make sure all canonical fields exist
for field in fields:
if field not in row:
row[field] = ''
# Build canonicalized record (fields in sorted order)
record = []
for field in sorted(row):
record.append(row[field])
# Quick and dirty output
print '\t'.join(record)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment