Send AWS Cost & Usage reports to Honeycomb
package main | |
import ( | |
"bytes" | |
"encoding/csv" | |
"fmt" | |
"io" | |
"io/ioutil" | |
"log" | |
"os" | |
"path/filepath" | |
"regexp" | |
"strconv" | |
"sync" | |
"time" | |
libhoney "github.com/honeycombio/libhoney-go" | |
flag "github.com/jessevdk/go-flags" | |
) | |
// An example of a script used to send the contents of an AWS Cost and Usage Report to Honeycomb | |
// - overview: https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/billing-reports-costusage.html | |
// - fields reference: https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/billing-reports-costusage-details.html | |
// | |
// USAGE: | |
// go run main.go -k {YOUR_HONEYCOMB_API_KEY} -d "AWS Cost and Usage" -f "{report file path}" -V | |
// | |
// NOTE: neither Honeycomb nor this script have any ability to dedupe line items | |
// sent in previous runs. Make sure you only send line items that are not already | |
// in Honeycomb and are no longer subject to change (e.g. from the prior month). | |
type Options struct { | |
WriteKey string `short:"k" long:"writekey" description:"your Honeycomb API hey."` | |
Dataset string `short:"d" long:"dataset" description:"dataset name."` | |
FilePrefix string `short:"p" long:"file_prefix" description:"the name of your report file without pagination numbers, e.g. 'myreport' and not 'myreport1-1.csv'"` | |
APIHost string `long:"api_host" description:"hostname for api.honeycomb.io."` | |
StartTime string `long:"start_time" description:"only import records with a TimeInterval that starts after this date. Useful for combining this script with cron to update a particular dataset regularly with only the newest line items. Uses time format 2006-01-02T15:04:05Z07:00"` | |
Verbose bool `short:"V" long:"verbose" description:"verbose output"` | |
File []string `short:"f" description:"csv file to examine. if absent, does file walk. May be specified multiple times to consume multiple files simultaneously"` | |
} | |
const ( | |
costField = "lineItem/BlendedCost" | |
timeField = "lineItem/UsageStartDate" | |
timeFormat = "2006-01-02T15:04:05Z07:00" | |
identityIDKey = "identity/LineItemId" | |
identityTimeKey = "identity/TimeInterval" | |
) | |
var options Options | |
var recordCount int64 | |
func main() { | |
flagParser := flag.NewParser(&options, flag.Default) | |
if extraArgs, err := flagParser.Parse(); err != nil || len(extraArgs) != 0 { | |
fmt.Printf("error parsing command line. use --help for help.") | |
os.Exit(1) | |
} | |
// Configure the Honeycomb SDK. We'll use this to send events to Honeycomb. | |
libhoney.Init(libhoney.Config{ | |
WriteKey: options.WriteKey, | |
Dataset: options.Dataset, | |
APIHost: options.APIHost, | |
BlockOnSend: true, | |
MaxConcurrentBatches: 5, // Increase to run faster, decrease if you hit rate limits | |
}) | |
var err error | |
for _, fn := range options.File { | |
if options.Verbose { | |
fmt.Printf("crunching file %s\n", fn) | |
} | |
if err = crunchFile(fn); err != nil { | |
break | |
} | |
} | |
// Handle sending to Honeycomb | |
rs := libhoney.Responses() | |
wg := sync.WaitGroup{} | |
wg.Add(1) | |
go func() { | |
for resp := range rs { | |
if options.Verbose { | |
if resp.StatusCode != 200 && resp.StatusCode != 202 { | |
fmt.Printf("returned %d in %v with reason %s\n", resp.StatusCode, resp.Duration, string(resp.Body)) | |
} | |
} | |
} | |
wg.Done() | |
}() | |
libhoney.Close() | |
wg.Wait() | |
if options.Verbose { | |
fmt.Println("Sent %i records", recordCount) | |
} | |
} | |
func crunchFile(path string) error { | |
dat, err := os.Open(path) | |
if err != nil { | |
fmt.Println("whoops os.Open", err) | |
os.Exit(1) | |
} | |
decompressed, err := gzip.NewReader(dat) | |
if err != nil { | |
fmt.Println("whoops gzip.NewReader", err) | |
os.Exit(1) | |
} | |
r := csv.NewReader(decompressed) | |
// first record (column headers) | |
columns, err := r.Read() | |
if err != nil { | |
fmt.Println("whoops r.read", err) | |
os.Exit(1) | |
} | |
// rest of the records | |
for i := 0; true; i++ { | |
ev := libhoney.NewEvent() | |
record, err := r.Read() | |
var evTime time.Time | |
if err == io.EOF { | |
break | |
} | |
if err != nil { | |
log.Fatal(err) | |
} | |
for i, colname := range columns { | |
// Set the special Honeycomb Timestamp field to timestamp your events | |
if colname == timeField { | |
evTime, err = time.Parse(timeFormat, record[i]) | |
if err == nil { | |
ev.Timestamp = evTime | |
continue | |
} else { | |
fmt.Println("derp i don't understand your timestamps", err) | |
os.Exit(1) | |
} | |
} | |
// cast the field to int or float if possible | |
if val, err := strconv.Atoi(record[i]); err == nil { | |
ev.AddField(colname, val) | |
} else if val, err := strconv.ParseFloat(record[i], 64); err == nil { | |
ev.AddField(colname, val) | |
} else { | |
// add it as a string | |
ev.AddField(colname, record[i]) | |
} | |
} | |
var startTime time.Time | |
if options.StartTime != "" { | |
startTime, err = time.Parse(timeFormat, options.StartTime) | |
} | |
if err != nil { | |
fmt.Println("oh no, i don't understand your start_time", err) | |
os.Exit(1) | |
} | |
if evTime.After(startTime) { | |
if options.Verbose { | |
if i%1000 == 0 { | |
fmt.Printf("fields: %v\n", ev.Fields()) | |
fmt.Println(record) | |
} | |
} | |
ev.Send() | |
recordCount++ | |
} | |
} | |
return nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment