Skip to content

Instantly share code, notes, and snippets.

@ajbouh
Created January 20, 2022 23:21
Show Gist options
  • Save ajbouh/462b1126e28f2755aeaf085d9b61ed17 to your computer and use it in GitHub Desktop.
Save ajbouh/462b1126e28f2755aeaf085d9b61ed17 to your computer and use it in GitHub Desktop.
diff --git a/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/main.go b/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/main.go
index 58c05607..2a5ce2a7 100644
--- a/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/main.go
+++ b/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/main.go
@@ -3,10 +3,12 @@ package main
import (
"bufio"
"compress/gzip"
+ "encoding/json"
"io"
"os"
"strconv"
"strings"
+ "time"
"github.com/aws/aws-lambda-go/events"
"github.com/aws/aws-lambda-go/lambda"
@@ -17,6 +19,7 @@ import (
"github.com/honeycombio/honeytail/httime"
"github.com/honeycombio/honeytail/parsers"
"github.com/honeycombio/libhoney-go"
+ "github.com/honeycombio/urlshaper"
"github.com/sirupsen/logrus"
)
@@ -28,9 +31,105 @@ type Response struct {
var parser parsers.LineParser
var parserType, timeFieldName, timeFieldFormat, env string
+var durationFieldName, durationEndFieldName, durationEndFieldFormat, parsedFieldNamePrefix string
+var requestFieldName string
+var fieldNameAliases map[string]string
+var amznTraceIdFieldName string
var matchPatterns, filterPatterns []string
var bufferSize uint
var forceGunzip bool
+var shaper requestShaper
+
+func parseLine(line string) (map[string]interface{}, error) {
+ parsedLine, err := parser.ParseLine(line)
+
+ if parsedFieldNamePrefix == "" || err != nil {
+ return parsedLine, err
+ }
+
+ m := make(map[string]interface{}, len(parsedLine))
+ for k, v := range parsedLine {
+ m[parsedFieldNamePrefix+k] = v
+ }
+
+ return m, nil
+}
+
+func processDuration(m map[string]interface{}, timestamp time.Time, durationEndFieldName, durationEndFieldFormat string) {
+ var durationMs float64
+
+ if tm, foundDurationEndField := httime.FindTimestamp(m, durationEndFieldName, durationEndFieldFormat); foundDurationEndField != "" {
+ duration := tm.Sub(timestamp)
+ if duration > 0 {
+ durationMs = float64(duration / time.Millisecond)
+ }
+ }
+ if durationMs > 0.0 {
+ m["duration_ms"] = durationMs
+ }
+}
+
+func parseAmznTraceIdValue(value string) (string, bool) {
+ split := strings.SplitN(value, "-", 3)
+ if split[0] != "1" {
+ return "", false
+ }
+ if len(split) != 3 {
+ return "", false
+ }
+
+ return split[2], split[2] != ""
+}
+
+// Based on https://github.com/honeycombio/honeyaws/blob/1a182a1176d7741c19b2d560dc8614ee7763612e/publisher/publisher.go#L200-L215
+// parse the included X-Amzn-Trace-Id header if it is present in an ALB access
+// log - see
+// https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html
+// for reference
+func processAmznTraceIdHeader(m map[string]interface{}, edgeMode bool, amznTraceFieldName string) {
+ amznTraceID, ok := m[amznTraceFieldName].(string)
+ if !ok {
+ return
+ }
+ fields := strings.Split(amznTraceID, ";")
+ rootSpan := true
+ for _, field := range fields {
+ kv := strings.Split(field, "=")
+ // something we don't expect
+ if len(kv) != 2 {
+ continue
+ }
+ key := kv[0]
+ val := kv[1]
+ switch key {
+ case "Root":
+ if id, ok := parseAmznTraceIdValue(val); ok {
+ m["trace.trace_id"] = id
+ }
+ case "Self":
+ if id, ok := parseAmznTraceIdValue(val); ok {
+ m["trace.span_id"] = id
+ }
+ rootSpan = false
+ case "Parent":
+ // if we're running in "edge mode", ignore the parent id. We want
+ // the load balancer to always be the root of the trace.
+ if !edgeMode {
+ if id, ok := parseAmznTraceIdValue(val); ok {
+ m["trace.parent_id"] = id
+ }
+ rootSpan = false
+ }
+ case "Sampled":
+ m["sampled"] = val
+ default:
+ m[key] = val
+ }
+ }
+ if rootSpan {
+ m["trace.span_id"] = m["trace.trace_id"].(string)
+ }
+}
func Handler(request events.S3Event) (Response, error) {
sess := session.Must(session.NewSession(&aws.Config{
@@ -68,7 +167,7 @@ func Handler(request events.S3Event) (Response, error) {
"key": record.S3.Object.Key,
}).Info("parser checkpoint")
}
- parsedLine, err := parser.ParseLine(scanner.Text())
+ parsedLine, err := parseLine(scanner.Text())
if err != nil {
logrus.WithError(err).WithField("line", scanner.Text()).
Warn("failed to parse line")
@@ -79,9 +178,26 @@ func Handler(request events.S3Event) (Response, error) {
}
hnyEvent := libhoney.NewEvent()
- timestamp := httime.GetTimestamp(parsedLine, timeFieldName, timeFieldFormat)
+ timestamp, _ := httime.FindTimestamp(parsedLine, timeFieldName, timeFieldFormat)
hnyEvent.Timestamp = timestamp
+ if amznTraceIdFieldName != "" {
+ processAmznTraceIdHeader(parsedLine, false, amznTraceIdFieldName)
+ }
+ if durationEndFieldName != "" {
+ processDuration(parsedLine, timestamp, durationEndFieldName, durationEndFieldFormat)
+ }
+
+ if requestFieldName != "" {
+ shaper.Shape(requestFieldName, parsedLine)
+ }
+
+ for newFieldName, existingFieldName := range fieldNameAliases {
+ if v, ok := parsedLine[existingFieldName]; ok {
+ parsedLine[newFieldName] = v
+ }
+ }
+
// convert ints and floats if necessary
if parserType != "json" {
hnyEvent.Add(common.ConvertTypes(parsedLine))
@@ -133,9 +249,29 @@ func main() {
common.AddUserAgentMetadata("s3", parserType)
env = os.Getenv("ENVIRONMENT")
+
timeFieldName = os.Getenv("TIME_FIELD_NAME")
timeFieldFormat = os.Getenv("TIME_FIELD_FORMAT")
+ amznTraceIdFieldName = os.Getenv("AMZN_TRACE_ID_FIELD_NAME")
+
+ durationEndFieldName = os.Getenv("DURATION_END_FIELD_NAME")
+ durationEndFieldFormat = os.Getenv("DURATION_END_FORMAT")
+
+ fieldNameAliases = map[string]string{}
+ fieldNameAliasJson := os.Getenv("FIELD_NAME_ALIAS_JSON")
+ if fieldNameAliasJson != "" {
+ if err = json.Unmarshal([]byte(fieldNameAliasJson), &fieldNameAliases); err != nil {
+ logrus.WithError(err).WithField("FIELD_NAME_ALIAS_JSON", fieldNameAliasJson).
+ Fatal("unable to parse FIELD_NAME_ALIAS_JSON")
+ return
+ }
+ }
+
+ parsedFieldNamePrefix = os.Getenv("PARSED_FIELD_NAME_PREFIX")
+
+ requestFieldName = os.Getenv("REQUEST_FIELD_NAME")
+
matchPatterns = []string{".*"}
filterPatterns = []string{}
bufferSize = 1024 * 64
@@ -157,6 +293,8 @@ func main() {
forceGunzip = true
}
+ shaper = requestShaper{&urlshaper.Parser{}}
+
lambda.Start(Handler)
}
diff --git a/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/request_shaper.go b/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/request_shaper.go
new file mode 100644
index 00000000..b46cf70a
--- /dev/null
+++ b/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/request_shaper.go
@@ -0,0 +1,54 @@
+package main
+
+import (
+ "strings"
+
+ "github.com/honeycombio/urlshaper"
+ "github.com/sirupsen/logrus"
+)
+
+type requestShaper struct {
+ pr *urlshaper.Parser
+}
+
+// Nicked directly from github.com/honeycombio/honeytail/leash.go
+// requestShape expects the field passed in to have the form
+// VERB /path/of/request HTTP/1.x
+// If it does, it will break it apart into components, normalize the URL,
+// and add a handful of additional fields based on what it finds.
+func (rs *requestShaper) Shape(field string, m map[string]interface{}) {
+ if val, ok := m[field]; ok {
+ // start by splitting out method, uri, and version
+ parts := strings.Split(val.(string), " ")
+ var path string
+ if len(parts) == 3 {
+ // treat it as METHOD /path HTTP/1.X
+ m[field+"_method"] = parts[0]
+ m[field+"_protocol_version"] = parts[2]
+ path = parts[1]
+ } else {
+ // treat it as just the /path
+ path = parts[0]
+ }
+
+ // next up, get all the goodies out of the path
+ res, err := rs.pr.Parse(path)
+ if err != nil {
+ // couldn't parse it, just pass along the event
+ logrus.WithError(err).Error("Couldn't parse request")
+ return
+ }
+ m[field+"_uri"] = res.URI
+ m[field+"_path"] = res.Path
+ if res.Query != "" {
+ m[field+"_query"] = res.Query
+ }
+ m[field+"_shape"] = res.Shape
+ if res.QueryShape != "" {
+ m[field+"_queryshape"] = res.QueryShape
+ }
+ for k, v := range res.PathFields {
+ m[field+"_path_"+k] = v[0]
+ }
+ }
+}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment