-
-
Save ajbouh/462b1126e28f2755aeaf085d9b61ed17 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/main.go b/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/main.go | |
index 58c05607..2a5ce2a7 100644 | |
--- a/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/main.go | |
+++ b/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/main.go | |
@@ -3,10 +3,12 @@ package main | |
import ( | |
"bufio" | |
"compress/gzip" | |
+ "encoding/json" | |
"io" | |
"os" | |
"strconv" | |
"strings" | |
+ "time" | |
"github.com/aws/aws-lambda-go/events" | |
"github.com/aws/aws-lambda-go/lambda" | |
@@ -17,6 +19,7 @@ import ( | |
"github.com/honeycombio/honeytail/httime" | |
"github.com/honeycombio/honeytail/parsers" | |
"github.com/honeycombio/libhoney-go" | |
+ "github.com/honeycombio/urlshaper" | |
"github.com/sirupsen/logrus" | |
) | |
@@ -28,9 +31,105 @@ type Response struct { | |
var parser parsers.LineParser | |
var parserType, timeFieldName, timeFieldFormat, env string | |
+var durationFieldName, durationEndFieldName, durationEndFieldFormat, parsedFieldNamePrefix string | |
+var requestFieldName string | |
+var fieldNameAliases map[string]string | |
+var amznTraceIdFieldName string | |
var matchPatterns, filterPatterns []string | |
var bufferSize uint | |
var forceGunzip bool | |
+var shaper requestShaper | |
+ | |
+func parseLine(line string) (map[string]interface{}, error) { | |
+ parsedLine, err := parser.ParseLine(line) | |
+ | |
+ if parsedFieldNamePrefix == "" || err != nil { | |
+ return parsedLine, err | |
+ } | |
+ | |
+ m := make(map[string]interface{}, len(parsedLine)) | |
+ for k, v := range parsedLine { | |
+ m[parsedFieldNamePrefix+k] = v | |
+ } | |
+ | |
+ return m, nil | |
+} | |
+ | |
+func processDuration(m map[string]interface{}, timestamp time.Time, durationEndFieldName, durationEndFieldFormat string) { | |
+ var durationMs float64 | |
+ | |
+ if tm, foundDurationEndField := httime.FindTimestamp(m, durationEndFieldName, durationEndFieldFormat); foundDurationEndField != "" { | |
+ duration := tm.Sub(timestamp) | |
+ if duration > 0 { | |
+ durationMs = float64(duration / time.Millisecond) | |
+ } | |
+ } | |
+ if durationMs > 0.0 { | |
+ m["duration_ms"] = durationMs | |
+ } | |
+} | |
+ | |
+func parseAmznTraceIdValue(value string) (string, bool) { | |
+ split := strings.SplitN(value, "-", 3) | |
+ if split[0] != "1" { | |
+ return "", false | |
+ } | |
+ if len(split) != 3 { | |
+ return "", false | |
+ } | |
+ | |
+ return split[2], split[2] != "" | |
+} | |
+ | |
+// Based on https://github.com/honeycombio/honeyaws/blob/1a182a1176d7741c19b2d560dc8614ee7763612e/publisher/publisher.go#L200-L215 | |
+// parse the included X-Amzn-Trace-Id header if it is present in an ALB access | |
+// log - see | |
+// https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-request-tracing.html | |
+// for reference | |
+func processAmznTraceIdHeader(m map[string]interface{}, edgeMode bool, amznTraceFieldName string) { | |
+ amznTraceID, ok := m[amznTraceFieldName].(string) | |
+ if !ok { | |
+ return | |
+ } | |
+ fields := strings.Split(amznTraceID, ";") | |
+ rootSpan := true | |
+ for _, field := range fields { | |
+ kv := strings.Split(field, "=") | |
+ // something we don't expect | |
+ if len(kv) != 2 { | |
+ continue | |
+ } | |
+ key := kv[0] | |
+ val := kv[1] | |
+ switch key { | |
+ case "Root": | |
+ if id, ok := parseAmznTraceIdValue(val); ok { | |
+ m["trace.trace_id"] = id | |
+ } | |
+ case "Self": | |
+ if id, ok := parseAmznTraceIdValue(val); ok { | |
+ m["trace.span_id"] = id | |
+ } | |
+ rootSpan = false | |
+ case "Parent": | |
+ // if we're running in "edge mode", ignore the parent id. We want | |
+ // the load balancer to always be the root of the trace. | |
+ if !edgeMode { | |
+ if id, ok := parseAmznTraceIdValue(val); ok { | |
+ m["trace.parent_id"] = id | |
+ } | |
+ rootSpan = false | |
+ } | |
+ case "Sampled": | |
+ m["sampled"] = val | |
+ default: | |
+ m[key] = val | |
+ } | |
+ } | |
+ if rootSpan { | |
+ m["trace.span_id"] = m["trace.trace_id"].(string) | |
+ } | |
+} | |
func Handler(request events.S3Event) (Response, error) { | |
sess := session.Must(session.NewSession(&aws.Config{ | |
@@ -68,7 +167,7 @@ func Handler(request events.S3Event) (Response, error) { | |
"key": record.S3.Object.Key, | |
}).Info("parser checkpoint") | |
} | |
- parsedLine, err := parser.ParseLine(scanner.Text()) | |
+ parsedLine, err := parseLine(scanner.Text()) | |
if err != nil { | |
logrus.WithError(err).WithField("line", scanner.Text()). | |
Warn("failed to parse line") | |
@@ -79,9 +178,26 @@ func Handler(request events.S3Event) (Response, error) { | |
} | |
hnyEvent := libhoney.NewEvent() | |
- timestamp := httime.GetTimestamp(parsedLine, timeFieldName, timeFieldFormat) | |
+ timestamp, _ := httime.FindTimestamp(parsedLine, timeFieldName, timeFieldFormat) | |
hnyEvent.Timestamp = timestamp | |
+ if amznTraceIdFieldName != "" { | |
+ processAmznTraceIdHeader(parsedLine, false, amznTraceIdFieldName) | |
+ } | |
+ if durationEndFieldName != "" { | |
+ processDuration(parsedLine, timestamp, durationEndFieldName, durationEndFieldFormat) | |
+ } | |
+ | |
+ if requestFieldName != "" { | |
+ shaper.Shape(requestFieldName, parsedLine) | |
+ } | |
+ | |
+ for newFieldName, existingFieldName := range fieldNameAliases { | |
+ if v, ok := parsedLine[existingFieldName]; ok { | |
+ parsedLine[newFieldName] = v | |
+ } | |
+ } | |
+ | |
// convert ints and floats if necessary | |
if parserType != "json" { | |
hnyEvent.Add(common.ConvertTypes(parsedLine)) | |
@@ -133,9 +249,29 @@ func main() { | |
common.AddUserAgentMetadata("s3", parserType) | |
env = os.Getenv("ENVIRONMENT") | |
+ | |
timeFieldName = os.Getenv("TIME_FIELD_NAME") | |
timeFieldFormat = os.Getenv("TIME_FIELD_FORMAT") | |
+ amznTraceIdFieldName = os.Getenv("AMZN_TRACE_ID_FIELD_NAME") | |
+ | |
+ durationEndFieldName = os.Getenv("DURATION_END_FIELD_NAME") | |
+ durationEndFieldFormat = os.Getenv("DURATION_END_FORMAT") | |
+ | |
+ fieldNameAliases = map[string]string{} | |
+ fieldNameAliasJson := os.Getenv("FIELD_NAME_ALIAS_JSON") | |
+ if fieldNameAliasJson != "" { | |
+ if err = json.Unmarshal([]byte(fieldNameAliasJson), &fieldNameAliases); err != nil { | |
+ logrus.WithError(err).WithField("FIELD_NAME_ALIAS_JSON", fieldNameAliasJson). | |
+ Fatal("unable to parse FIELD_NAME_ALIAS_JSON") | |
+ return | |
+ } | |
+ } | |
+ | |
+ parsedFieldNamePrefix = os.Getenv("PARSED_FIELD_NAME_PREFIX") | |
+ | |
+ requestFieldName = os.Getenv("REQUEST_FIELD_NAME") | |
+ | |
matchPatterns = []string{".*"} | |
filterPatterns = []string{} | |
bufferSize = 1024 * 64 | |
@@ -157,6 +293,8 @@ func main() { | |
forceGunzip = true | |
} | |
+ shaper = requestShaper{&urlshaper.Parser{}} | |
+ | |
lambda.Start(Handler) | |
} | |
diff --git a/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/request_shaper.go b/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/request_shaper.go | |
new file mode 100644 | |
index 00000000..b46cf70a | |
--- /dev/null | |
+++ b/third_party/com_github_honeycombio_agentless_integrations_for_aws/s3-handler/request_shaper.go | |
@@ -0,0 +1,54 @@ | |
+package main | |
+ | |
+import ( | |
+ "strings" | |
+ | |
+ "github.com/honeycombio/urlshaper" | |
+ "github.com/sirupsen/logrus" | |
+) | |
+ | |
+type requestShaper struct { | |
+ pr *urlshaper.Parser | |
+} | |
+ | |
+// Nicked directly from github.com/honeycombio/honeytail/leash.go | |
+// requestShape expects the field passed in to have the form | |
+// VERB /path/of/request HTTP/1.x | |
+// If it does, it will break it apart into components, normalize the URL, | |
+// and add a handful of additional fields based on what it finds. | |
+func (rs *requestShaper) Shape(field string, m map[string]interface{}) { | |
+ if val, ok := m[field]; ok { | |
+ // start by splitting out method, uri, and version | |
+ parts := strings.Split(val.(string), " ") | |
+ var path string | |
+ if len(parts) == 3 { | |
+ // treat it as METHOD /path HTTP/1.X | |
+ m[field+"_method"] = parts[0] | |
+ m[field+"_protocol_version"] = parts[2] | |
+ path = parts[1] | |
+ } else { | |
+ // treat it as just the /path | |
+ path = parts[0] | |
+ } | |
+ | |
+ // next up, get all the goodies out of the path | |
+ res, err := rs.pr.Parse(path) | |
+ if err != nil { | |
+ // couldn't parse it, just pass along the event | |
+ logrus.WithError(err).Error("Couldn't parse request") | |
+ return | |
+ } | |
+ m[field+"_uri"] = res.URI | |
+ m[field+"_path"] = res.Path | |
+ if res.Query != "" { | |
+ m[field+"_query"] = res.Query | |
+ } | |
+ m[field+"_shape"] = res.Shape | |
+ if res.QueryShape != "" { | |
+ m[field+"_queryshape"] = res.QueryShape | |
+ } | |
+ for k, v := range res.PathFields { | |
+ m[field+"_path_"+k] = v[0] | |
+ } | |
+ } | |
+} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment