Last active
September 23, 2020 16:19
-
-
Save eanakashima/fd794d46012bbe89326397aeea69e0ac to your computer and use it in GitHub Desktop.
Send AWS Cost & Usage reports to Honeycomb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"encoding/csv" | |
"fmt" | |
"io" | |
"io/ioutil" | |
"log" | |
"os" | |
"path/filepath" | |
"regexp" | |
"strconv" | |
"sync" | |
"time" | |
libhoney "github.com/honeycombio/libhoney-go" | |
flag "github.com/jessevdk/go-flags" | |
) | |
// An example of a script used to send the contents of an AWS Cost and Usage Report to Honeycomb | |
// - overview: https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/billing-reports-costusage.html | |
// - fields reference: https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/billing-reports-costusage-details.html | |
// | |
// USAGE: | |
// go run main.go -k {YOUR_HONEYCOMB_API_KEY} -d "AWS Cost and Usage" -f "{report file path}" -V | |
// | |
// NOTE: neither Honeycomb nor this script have any ability to dedupe line items | |
// sent in previous runs. Make sure you only send line items that are not already | |
// in Honeycomb and are no longer subject to change (e.g. from the prior month). | |
type Options struct { | |
WriteKey string `short:"k" long:"writekey" description:"your Honeycomb API hey."` | |
Dataset string `short:"d" long:"dataset" description:"dataset name."` | |
FilePrefix string `short:"p" long:"file_prefix" description:"the name of your report file without pagination numbers, e.g. 'myreport' and not 'myreport1-1.csv'"` | |
APIHost string `long:"api_host" description:"hostname for api.honeycomb.io."` | |
StartTime string `long:"start_time" description:"only import records with a TimeInterval that starts after this date. Useful for combining this script with cron to update a particular dataset regularly with only the newest line items. Uses time format 2006-01-02T15:04:05Z07:00"` | |
Verbose bool `short:"V" long:"verbose" description:"verbose output"` | |
File []string `short:"f" description:"csv file to examine. if absent, does file walk. May be specified multiple times to consume multiple files simultaneously"` | |
} | |
const ( | |
costField = "lineItem/BlendedCost" | |
timeField = "lineItem/UsageStartDate" | |
timeFormat = "2006-01-02T15:04:05Z07:00" | |
identityIDKey = "identity/LineItemId" | |
identityTimeKey = "identity/TimeInterval" | |
) | |
var options Options | |
var recordCount int64 | |
func main() { | |
flagParser := flag.NewParser(&options, flag.Default) | |
if extraArgs, err := flagParser.Parse(); err != nil || len(extraArgs) != 0 { | |
fmt.Printf("error parsing command line. use --help for help.") | |
os.Exit(1) | |
} | |
// Configure the Honeycomb SDK. We'll use this to send events to Honeycomb. | |
libhoney.Init(libhoney.Config{ | |
WriteKey: options.WriteKey, | |
Dataset: options.Dataset, | |
APIHost: options.APIHost, | |
BlockOnSend: true, | |
MaxConcurrentBatches: 5, // Increase to run faster, decrease if you hit rate limits | |
}) | |
var err error | |
for _, fn := range options.File { | |
if options.Verbose { | |
fmt.Printf("crunching file %s\n", fn) | |
} | |
if err = crunchFile(fn); err != nil { | |
break | |
} | |
} | |
// Handle sending to Honeycomb | |
rs := libhoney.Responses() | |
wg := sync.WaitGroup{} | |
wg.Add(1) | |
go func() { | |
for resp := range rs { | |
if options.Verbose { | |
if resp.StatusCode != 200 && resp.StatusCode != 202 { | |
fmt.Printf("returned %d in %v with reason %s\n", resp.StatusCode, resp.Duration, string(resp.Body)) | |
} | |
} | |
} | |
wg.Done() | |
}() | |
libhoney.Close() | |
wg.Wait() | |
if options.Verbose { | |
fmt.Println("Sent %i records", recordCount) | |
} | |
} | |
func crunchFile(path string) error { | |
dat, err := os.Open(path) | |
if err != nil { | |
fmt.Println("whoops os.Open", err) | |
os.Exit(1) | |
} | |
decompressed, err := gzip.NewReader(dat) | |
if err != nil { | |
fmt.Println("whoops gzip.NewReader", err) | |
os.Exit(1) | |
} | |
r := csv.NewReader(decompressed) | |
// first record (column headers) | |
columns, err := r.Read() | |
if err != nil { | |
fmt.Println("whoops r.read", err) | |
os.Exit(1) | |
} | |
// rest of the records | |
for i := 0; true; i++ { | |
ev := libhoney.NewEvent() | |
record, err := r.Read() | |
var evTime time.Time | |
if err == io.EOF { | |
break | |
} | |
if err != nil { | |
log.Fatal(err) | |
} | |
for i, colname := range columns { | |
// Set the special Honeycomb Timestamp field to timestamp your events | |
if colname == timeField { | |
evTime, err = time.Parse(timeFormat, record[i]) | |
if err == nil { | |
ev.Timestamp = evTime | |
continue | |
} else { | |
fmt.Println("derp i don't understand your timestamps", err) | |
os.Exit(1) | |
} | |
} | |
// cast the field to int or float if possible | |
if val, err := strconv.Atoi(record[i]); err == nil { | |
ev.AddField(colname, val) | |
} else if val, err := strconv.ParseFloat(record[i], 64); err == nil { | |
ev.AddField(colname, val) | |
} else { | |
// add it as a string | |
ev.AddField(colname, record[i]) | |
} | |
} | |
var startTime time.Time | |
if options.StartTime != "" { | |
startTime, err = time.Parse(timeFormat, options.StartTime) | |
} | |
if err != nil { | |
fmt.Println("oh no, i don't understand your start_time", err) | |
os.Exit(1) | |
} | |
if evTime.After(startTime) { | |
if options.Verbose { | |
if i%1000 == 0 { | |
fmt.Printf("fields: %v\n", ev.Fields()) | |
fmt.Println(record) | |
} | |
} | |
ev.Send() | |
recordCount++ | |
} | |
} | |
return nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment