input {
# translate syslog messages into logstash events
# with priority field, fields added by SYSLOGLINE pattern
# (e.g. timestamp, logsource, program, pid, etc.) and the
# rest of the syslog string in the message field
syslog {
# port => 514
# codec => plain
# syslog_field => "message"
# @see syslog pattern reference for details on fields parsed
# https://github.com/logstash-plugins/logstash-patterns-core/blob/0c9b956/patterns/linux-syslog#L9
# https://github.com/elastic/logstash/blob/v1.4.2/patterns/grok-patterns
# grok_pattern => "<%{POSINT:priority}>%{SYSLOGLINE}"
}
}
filter {
if [type] == "heroku" {
# split multiline message fields into separate events
split {
# field => "message"
}
# further parse out syslog parameters in this specific flavor of syslog from the message field
# and place teh rest of the syslog string (the encapsulated "payload") in the logplex_message field
grok {
match => {
"message" => "%{SYSLOG5424PRI}%{NONNEGINT:syslog5424_ver} +(?:%{TIMESTAMP_ISO8601:timestamp}|-) +(?:%{HOSTNAME:logplex_host}|-) +(?:%{WORD:logplex_source}|-) +(?:%{DATA:logplex_dyno}|-) +(-) +%{GREEDYDATA:logplex_message}"
}
}
# parse heroku KVP fields out of the encapsulated payload (logplex_message)
kv {
source => "logplex_message"
prefix => "msg_"
include_keys => [ "at", "method", "path", "host", "request_id", "fwd", "dyno", "connect", "service", "status", "bytes", "source", "id", "wait", "timeout", "state" ]
}
# parse application JSON fields out of the encapsulated payload (logplex_message)
# @note even more ideal than skip_on_invalid_json would be
# to not skip, but only filter if json is expected (e.g. is a log event
# coming from the application where JSON payload is expected) with something
# like `if [source] == "puma"`.
json {
skip_on_invalid_json => true
source => "logplex_message"
add_tag => [ "json" ]
}
# set log facility and severity fields
syslog_pri { syslog_pri_field_name => "syslog5424_pri" }
# set the event date and @timestamp field from the syslog timestamp
# @note: would prefer to use payload/message timestamp
# and fall back to syslog timestamp if there isn't one
date {
match => [ "timestamp", "ISO8601" ]
target => "@timestamp"
}
# remove the now-duplicate timestamp field
# and the message field since we've already
# parsed all we care about out of it
mutate {
remove_field => [ "timestamp", "message" ]
}
}
}