Skip to content

Instantly share code, notes, and snippets.

@adaam
Last active October 13, 2021 04:46
Show Gist options
  • Save adaam/a0c4fb61e48431a9ea0c7d32b0270b6f to your computer and use it in GitHub Desktop.
Save adaam/a0c4fb61e48431a9ea0c7d32b0270b6f to your computer and use it in GitHub Desktop.
NGINX pipeline for elasticsearch
{
"description": "Pipeline for parsing Nginx access logs. Requires the user_agent plugins.",
"processors": [{
"grok": {
"field": "message",
"patterns":[
"\"?%{IP_LIST:nginx.access.remote_ip_list} - %{DATA:nginx.access.user_name} \\[%{HTTPDATE:nginx.access.time}\\] \"%{GREEDYDATA:nginx.access.info}\" %{NUMBER:nginx.access.response_code} %{NUMBER:nginx.access.body_sent.bytes} \"%{DATA:nginx.access.referrer}\" \"%{DATA:nginx.access.agent}\" %{DATA:nginx.access.CF_RAY} %{NUMBER:nginx.access.request_time}"
],
"pattern_definitions": {
"IP_LIST": "%{IP}(\"?,?\\s*%{IP})*"
},
"ignore_missing": true
}
}, {
"grok": {
"field": "nginx.access.info",
"patterns": [
"%{WORD:nginx.access.method} %{DATA:nginx.access.url} HTTP/%{NUMBER:nginx.access.http_version}",
""
],
"ignore_missing": true
}
}, {
"remove": {
"field": "nginx.access.info"
}
}, {
"split": {
"field": "nginx.access.remote_ip_list",
"separator": "\"?,?\\s+"
}
}, {
"script": {
"lang": "painless",
"inline": "boolean isPrivate(def ip) { try { StringTokenizer tok = new StringTokenizer(ip, '.'); int firstByte = Integer.parseInt(tok.nextToken()); int secondByte = Integer.parseInt(tok.nextToken()); if (firstByte == 10) { return true; } if (firstByte == 192 && secondByte == 168) { return true; } if (firstByte == 172 && secondByte >= 16 && secondByte <= 31) { return true; } if (firstByte == 127) { return true; } return false; } catch (Exception e) { return false; } } def found = false; for (def item : ctx.nginx.access.remote_ip_list) { if (!isPrivate(item)) { ctx.nginx.access.remote_ip = item; found = true; break; } } if (!found) { ctx.nginx.access.remote_ip = ctx.nginx.access.remote_ip_list[0]; }"
}
}, {
"remove":{
"field": "message"
}
}, {
"rename": {
"field": "@timestamp",
"target_field": "read_timestamp"
}
}, {
"date": {
"field": "nginx.access.time",
"target_field": "@timestamp",
"formats": ["dd/MMM/YYYY:H:m:s Z"]
}
}, {
"remove": {
"field": "nginx.access.time"
}
}, {
"user_agent": {
"field": "nginx.access.agent",
"target_field": "nginx.access.user_agent"
}
}, {
"convert": {
"field": "nginx.access.request_time",
"type": "float"
}
}, {
"rename": {
"field": "nginx.access.agent",
"target_field": "nginx.access.user_agent.original"
}
}],
"on_failure" : [{
"set" : {
"field" : "error.message",
"value" : "{{ _ingest.on_failure_message }}"
}
}]
}
{
"description": "Pipeline for parsing the Nginx error logs",
"processors": [{
"grok": {
"field": "message",
"patterns": [
"%{DATA:nginx.error.time} \\[%{DATA:nginx.error.level}\\] %{NUMBER:nginx.error.pid}#%{NUMBER:nginx.error.tid}: (\\*%{NUMBER:nginx.error.connection_id} )?%{GREEDYDATA:nginx.error.message}, client: %{IP:nginx.error.client}, server: %{GREEDYDATA:nginx.error.server}, request: \"(?:%{WORD:nginx.error.verb} %{NOTSPACE:nginx.error.request}(?: HTTP/%{NUMBER:nginx.error.httpversion}))\", host: \"%{GREEDYDATA:nginx.error.host}\"(, referrer: \"%{GREEDYDATA:nginx.error.referrer}\")?"
],
"ignore_missing": true
}
},{
"remove":{
"field": "message"
}
}, {
"rename": {
"field": "@timestamp",
"target_field": "read_timestamp"
}
}, {
"date": {
"field": "nginx.error.time",
"target_field": "@timestamp",
"formats": ["YYYY/MM/dd H:m:s"]
}
}, {
"remove": {
"field": "nginx.error.time"
}
}],
"on_failure" : [{
"set" : {
"field" : "error.message",
"value" : "{{ _ingest.on_failure_message }}"
}
}]
}
{
"description" : "NGINX grok for custom log",
"processors": [
{
"grok": {
"field": "message",
"patterns": ["%{IPORHOST:clientip} (?:-|(%{WORD}.%{WORD})) %{USER:ident} \\[%{HTTPDATE:timestamp}\\] \"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})\" %{NUMBER:response} (?:%{NUMBER:bytes}|-) %{QS:referrer} %{QS:agent} %{DATA:CF_RAY} %{NUMBER:request_time}"],
"trace_match" : true
},
"date": {
"field": "timestamp",
"formats": ["dd/MMM/YYYY:HH:mm:ss Z"]
},
"remove": {
"field": ["message", "timestamp"]
}
},
{
"convert": {
"field": "httpversion",
"type": "integer"
}
},
{
"convert": {
"field": "response",
"type": "integer"
}
},
{
"convert": {
"field": "bytes",
"type": "integer"
}
},
{
"convert": {
"field": "request_time",
"type": "float"
}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment