Created
May 17, 2015 10:10
-
-
Save lgoldstein/898b1a92a9e435c29116 to your computer and use it in GitHub Desktop.
Logstash configuration causing 1.5.0 failure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# version=1.0.12 | |
input { | |
stdin { | |
tags => [ "syslog-raw" ] | |
} | |
# syslog { | |
# tags => [ "syslog-raw" ] | |
# use_labels => false | |
# port => 5151 | |
# } | |
# tcp { | |
# codec => "plain" | |
# data_timeout => -1 | |
# host => "0.0.0.0" | |
# mode => "server" | |
# port => 5151 | |
# tags => [ "syslog-raw" ] | |
# } | |
# udp { | |
# buffer_size => 8192 | |
# codec => "plain" | |
# host => "0.0.0.0" | |
# port => 5151 | |
# queue_size => 2000 | |
# tags => [ "syslog-raw" ] | |
# workers => 4 | |
# } | |
} | |
# 1st take care of any raw syslog messages | |
filter { | |
if ("syslog-raw" in [tags]) { | |
# From RFC5424: The syslog message has the following ABNF definition: | |
# | |
# SYSLOG-MSG = HEADER SP STRUCTURED-DATA [SP MSG] | |
# HEADER = PRI VERSION SP TIMESTAMP SP HOSTNAME SP APP-NAME SP PROCID SP MSGID | |
# PRI = "<" PRIVAL ">" | |
# PRIVAL = 1*3DIGIT ; range 0 .. 191 | |
# VERSION = NONZERO-DIGIT 0*2DIGIT | |
# HOSTNAME = NILVALUE / 1*255PRINTUSASCII | |
# APP-NAME = NILVALUE / 1*48PRINTUSASCII | |
# PROCID = NILVALUE / 1*128PRINTUSASCII | |
# MSGID = NILVALUE / 1*32PRINTUSASCII | |
# TIMESTAMP = NILVALUE / FULL-DATE "T" FULL-TIME | |
# FULL-DATE = DATE-FULLYEAR "-" DATE-MONTH "-" DATE-MDAY | |
# DATE-FULLYEAR = 4DIGIT | |
# DATE-MONTH = 2DIGIT ; 01-12 | |
# DATE-MDAY = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on ; month/year | |
# FULL-TIME = PARTIAL-TIME TIME-OFFSET | |
# PARTIAL-TIME = TIME-HOUR ":" TIME-MINUTE ":" TIME-SECOND [TIME-SECFRAC] | |
# TIME-HOUR = 2DIGIT ; 00-23 | |
# TIME-MINUTE = 2DIGIT ; 00-59 | |
# TIME-SECOND = 2DIGIT ; 00-59 | |
# TIME-SECFRAC = "." 1*6DIGIT | |
# TIME-OFFSET = "Z" / TIME-NUMOFFSET | |
# TIME-NUMOFFSET = ("+" / "-") TIME-HOUR ":" TIME-MINUTE | |
# STRUCTURED-DATA = NILVALUE / 1*SD-ELEMENT | |
# SD-ELEMENT = "[" SD-ID *(SP SD-PARAM) "]" | |
# SD-PARAM = PARAM-NAME "=" %d34 PARAM-VALUE %d34 | |
# SD-ID = SD-NAME | |
# PARAM-NAME = SD-NAME | |
# PARAM-VALUE = UTF-8-STRING ; characters '"', '\' and ; ']' MUST be escaped. | |
# SD-NAME = 1*32PRINTUSASCII ; except '=', SP, ']', %d34 (") | |
# MSG = MSG-ANY / MSG-UTF8 | |
# MSG-ANY = *OCTET ; not starting with BOM | |
# MSG-UTF8 = BOM UTF-8-STRING | |
# BOM = %xEF.BB.BF | |
# UTF-8-STRING = *OCTET ; UTF-8 string as specified in RFC 3629 | |
# OCTET = %d00-255 | |
# SP = %d32 | |
# PRINTUSASCII = %d33-126 | |
# NONZERO-DIGIT = %d49-57 | |
# DIGIT = %d48 / NONZERO-DIGIT | |
# NILVALUE = "-" | |
grok { | |
# see <logstash-install-dir>/patterns/linux-syslog | |
match => { "message" => "%{SYSLOG5424LINE}" } | |
add_tag => [ "syslog5424-compliant" ] | |
} | |
date { | |
# season to taste for your own syslog format(s) | |
match => [ "syslog5424_ts", | |
"MMM d HH:mm:ss", | |
"MMM dd HH:mm:ss", | |
"ISO8601" | |
] | |
add_tag => [ "syslog5424-timestamp" ] | |
} | |
grok { | |
match => { "syslog5424_sd" => "%{USERNAME:tenant}@%{POSINT:enterprise} +%{GREEDYDATA:syslog5424_sd_elements}]" } | |
add_tag => [ "syslog5424-tenant-and-enterprise" ] | |
} | |
# TODO need an improved pattern to allow for extra elements | |
# NOTE: we use separate grokkers in order not to depenend on order of elements | |
grok { | |
match => { "syslog5424_sd_elements" => "access=\"%{DATA:access}\"" } | |
add_tag => [ "syslog5424-access" ] | |
} | |
grok { | |
match => { "syslog5424_sd_elements" => "origin=\"%{PROG:objecttype}\"" } | |
add_tag => [ "syslog5424-origin" ] | |
} | |
grok { | |
match => { "syslog5424_sd_elements" => "agentid=\"%{DATA:agentid}\"" } | |
add_tag => [ "syslog5424-agentid" ] | |
} | |
if ("syslog5424-compliant" in [tags]) | |
and ("syslog5424-timestamp" in [tags]) | |
and ("syslog5424-tenant-and-enterprise" in [tags]) | |
and ("syslog5424-origin" in [tags]) | |
and ("syslog5424-agentid" in [tags]) | |
and ("syslog5424-access" in [tags]) { | |
# Uncomment if want to carry it over | |
#mutate { | |
# # Normalize values of some known fields | |
# convert => [ | |
# "syslog5424_pri", "integer", | |
# "syslog5424_ver", "integer" | |
# ] | |
#} | |
mutate { | |
# replace the field data since we parsed the message header | |
replace => [ "message", "%{syslog5424_msg}" ] | |
# remove the fields we don't carry over to the next filter | |
remove_field => [ | |
"syslog5424_pri", | |
"syslog5424_ver", | |
"syslog5424_ts", | |
"syslog5424_sd", | |
"syslog5424_msg", | |
"syslog5424_sd_elements", | |
"syslog5424_app", | |
"syslog5424_proc", | |
"priority", | |
"severity", | |
"facility", | |
"facility_label", | |
"severity_label" | |
] | |
# canonicalize some fields | |
rename => [ | |
"syslog5424_app", "application", | |
"syslog5424_host", "agenthost" | |
] | |
# update the tags to include the origin in order to trigger the next filter in chain | |
add_tag => [ "%{objecttype}" ] | |
# mark the fact that it is no longer a raw syslog line + remove the tags we used to detect OK parsing | |
remove_tag => [ "syslog-raw", | |
"syslog5424-compliant", | |
"syslog5424-timestamp", | |
"syslog5424-tenant-and-enterprise", | |
"syslog5424-access", | |
"syslog5424-origin", | |
"syslog5424-agentid", | |
"_grokparsefailure" | |
] | |
} | |
} else { | |
mutate { | |
add_tag => [ "syslog5424-parse-error" ] | |
} | |
} | |
} | |
} | |
# Now use conditional per-flavor filters | |
filter { | |
if ("nginx-access" in [tags]) or ("apache2-access" in [tags]) { | |
grok { | |
match => { "message" => "%{COMBINEDAPACHELOG}" } | |
add_tag => [ "http-access-combined-log" ] | |
} | |
date { | |
match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ] | |
add_tag => [ "http-access-timestamp" ] | |
} | |
if ("http-access-combined-log" in [tags]) | |
and ("http-access-timestamp" in [tags]) { | |
# strip quotes | |
grok { | |
match => { "agent" => "^\"%{GREEDYDATA:useragent}\"$" } | |
} | |
mutate { | |
# Add some fields we need | |
add_field => [ | |
"protocol", "HTTP" | |
] | |
# Remove fields we don't need | |
remove_field => [ | |
"ident", | |
"agent", | |
"referrer", | |
"host", | |
"timestamp" | |
] | |
# strip quotes | |
gsub => [ | |
"agent", "^\"", "" | |
] | |
# Canonicalize some existing fields - TODO add a 'timestamp' field with milliseconds value | |
rename => [ | |
"clientip", "remoteaddr", | |
"verb", "method", | |
"request", "uri", | |
"auth", "remoteuser", | |
"httpversion", "version", | |
"bytes", "responsesize", | |
"response", "statuscode" | |
] | |
convert => [ | |
"responsesize", "integer", | |
"statuscode", "integer" | |
] | |
uppercase => [ "method" ] | |
# remove helper tags | |
remove_tag => [ "%{objecttype}", "http-access-combined-log", "http-access-timestamp" ] | |
# mark the message as complete | |
add_tag => [ "http-access-ready" ] | |
} | |
} else { | |
mutate { | |
add_tag => [ "http-access-parse-error" ] | |
} | |
} | |
} | |
} | |
# see https://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/access-log-collection.html | |
filter { | |
if ("aws-lb" in [tags]) { | |
grok { | |
match => { "message" => "^%{TIMESTAMP_ISO8601:timestamp} %{HOSTNAME:lb-name} %{IPORHOST:remoteaddr}:%{POSINT} %{IPORHOST:correlation}:%{POSINT} %{BASE10NUM:req-time} %{BASE10NUM:backend-time} %{BASE10NUM:rsp-time} %{NUMBER:lb-code} %{NUMBER:statuscode} %{NUMBER:requestsize} %{NUMBER:responsesize} \"%{WORD:method} %{URIPROTO:protocol}://(?:%{USER}(?::[^@]*)?@)?(?:%{IPORHOST:signature}(?::%{POSINT})?)?(?:%{NOTSPACE:uri}) HTTP/%{NUMBER:version}\"%{SPACE}$" } | |
add_tag => [ "aws-lb-entry" ] | |
} | |
date { | |
match => [ "timestamp", "ISO8601" ] | |
add_tag => [ "aws-lb-access-timestamp" ] | |
remove_field => [ "timestamp" ] | |
} | |
if ("aws-lb-entry" in [tags]) | |
and ("aws-lb-access-timestamp" in [tags]) { | |
mutate { | |
convert => [ | |
"requestsize", "integer", | |
"responsesize", "integer", | |
"statuscode", "integer", | |
"lb-code", "integer", | |
"req-time", "float", | |
"backend-time", "float", | |
"rsp-time", "float" | |
] | |
uppercase => [ "method", "protocol" ] | |
# The actual agent host is not the syslog agent but rather the LB itself | |
update => [ "agenthost", "%{lb-name}" ] | |
} | |
ruby { | |
code => "event['nanoseconds'] = ((event['req-time'] + event['backend-time'] + event['rsp-time']) * 1000000000).ceil; | |
event['duration'] = (event['nanoseconds'] / 1000000).ceil;" | |
} | |
mutate { | |
# remove no longer needed fields | |
remove_field => [ "req-time", "backend-time", "rsp-time", "lb-code", "host", "lb-name" ] | |
# remove helper tags | |
remove_tag => [ "aws-lb", "timestamp", "aws-lb-entry", "aws-lb-access-timestamp" ] | |
# mark the message as complete | |
add_tag => [ "http-access-ready" ] | |
} | |
} else { | |
mutate { | |
add_tag => [ "aws-lb-access-parse-error" ] | |
} | |
} | |
} | |
} | |
# deconstruct the URI into path, filename, filetype and query | |
filter { | |
if ("http-access-ready" in [tags]) and ([uri] =~ /.+/) { | |
grok { | |
match => { "uri" => "^%{GREEDYDATA:path}\?%{GREEDYDATA:query}$" } | |
add_tag => [ "uri-path-and-query" ] | |
} | |
# if don't have both then assume no query | |
if ("uri-path-and-query" not in [tags]) { | |
mutate { | |
add_field => [ | |
"path", "%{uri}" | |
] | |
remove_tag => [ "_grokparsefailure" ] | |
} | |
} else { | |
mutate { | |
remove_tag => [ "uri-path-and-query" ] | |
} | |
} | |
grok { | |
match => { "path" => "^%{GREEDYDATA:pathPart}/%{GREEDYDATA:filename}\.(?<filetype>[a-zA-Z0-9]+)$" } | |
remove_tag => [ "uri-path-and-query" ] | |
add_tag => [ "file-name-and-type" ] | |
} | |
if ("file-name-and-type" in [tags]) { | |
if [pathPart] { | |
mutate { | |
replace => [ "path", "%{pathPart}" ] | |
remove_field => [ "pathPart" ] | |
remove_tag => [ "file-name-and-type" ] | |
} | |
} else { | |
mutate { | |
replace => [ "path", "/" ] | |
remove_tag => [ "file-name-and-type" ] | |
} | |
} | |
} else { | |
mutate { | |
remove_tag => [ "_grokparsefailure" ] | |
} | |
} | |
} | |
} | |
# resolve GeoIP information | |
filter { | |
if [remoteaddr] { | |
geoip { | |
source => "remoteaddr" | |
target => "location" | |
fields => [ "latitude", "longitude" ] | |
add_tag => [ "geoip-resolved" ] | |
} | |
} | |
if ("geoip-resolved" in [tags]) { | |
mutate { | |
# for some reason the 'geoip' filter adds this field even though we have not asked for it | |
remove_field => [ "[location][location]" ] | |
# normalize names to match Elasticsearch | |
rename => [ | |
"[location][latitude]" , "[location][lat]", | |
"[location][longitude]" , "[location][lon]" | |
] | |
remove_tag => [ "geoip-resolved" ] | |
} | |
} | |
} | |
# if all ready then remove all non-essential fields and create canonical form | |
filter { | |
if ("http-access-ready" in [tags]) { | |
mutate { | |
add_field => [ | |
"origin", "accesslog", | |
"flavor", "http" | |
] | |
remove_field => [ | |
"@version", | |
"enterprise", | |
"access" | |
] | |
rename => [ | |
"message", "label" | |
] | |
# sometimes an OK match can leave this as a leftover | |
remove_tag => [ "_grokparsefailure" ] | |
#ruby { | |
# code => "event['timevaluemsec'] = (event['@timestamp'].to_f * 1000.0).to_i;" | |
#} | |
# TODO re-format '@timestamp' field to contain GMT offset instead of 'Z' | |
#ruby { | |
# code => "event['@timestamp'] = event['@timestamp'].local('-08:00')" | |
#} | |
} | |
} | |
} | |
output { | |
if ("http-access-ready" in [tags]) { | |
stdout { | |
# codec => json_lines {} | |
codec => rubydebug {} | |
} | |
# elasticsearch_http { | |
# host => "localhost" | |
# # default, but bears re-listing just so we remember it | |
# port => 9200 | |
# # default, but bears re-listing just so we remember it | |
# flush_size => 100 | |
# idle_flush_time => 15 | |
# index => "%{tenant}" | |
# index_type => "%{flavor}" | |
# manage_template => false | |
# template_name => "logstash" | |
# # default, but bears re-listing just so we remember it | |
# template_overwrite => false | |
# } | |
# } else if ("auth-access" in [tags]) { | |
# file { | |
# codec => "plain" | |
# flush_interval => 5 | |
# gzip => false | |
# message_format => "%{message}" | |
# path => "/var/log/f2bauth.log" | |
# } | |
} else { | |
stdout { | |
# codec => rubydebug {} | |
codec => json_lines {} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment