Skip to content

Instantly share code, notes, and snippets.

@steveash
Last active May 6, 2022 03:41
Show Gist options
  • Save steveash/82f5f3bd10b1a6eb7bcc to your computer and use it in GitHub Desktop.
Save steveash/82f5f3bd10b1a6eb7bcc to your computer and use it in GitHub Desktop.
ELK configuration for aggregating cassandra and spark logs
input {
lumberjack {
# The port to listen on
port => 5043
# The paths to your ssl cert and key
ssl_certificate => "/etc/pki/tls/certs/logstash-forwarder/logstash-forwarder.crt"
ssl_key => "/etc/pki/tls/private/logstash-forwarder/logstash-forwarder.key"
# default type, but this will already be set by logstash-forwarder anyways
type => "clusterlogs"
}
}
filter {
if [type] in ["cassandra","spark-worker"] {
grok {
match => { "message" => [ "%{WORD:level}\s+\[%{DATA:thread}\]\s+(?<logtime>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+%{JAVACLASS:srcclass}:\d+ - %{GREEDYDATA:data}", "%{WORD:level}\s+\[%{DATA:thread}\]\s+(?<logtime>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+%{DATA:worker}\s+%{JAVACLASS:srcclass}:\d+ - %{GREEDYDATA:data}" ] }
add_field => [ "received_at", "%{@timestamp}" ]
}
date {
match => [ "logtime", "YYYY-MM-dd HH:mm:ss,SSS" ]
}
}
}http://01_cassandra_filter.conf
filter {
if [type] in ["spark-app","spark-driver", "spark-worker"] {
grok {
match => { "message" => [ "\s*%{WORD:level}\s+(?<logtime>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+%{JAVACLASS:srcclass}:\s+%{GREEDYDATA:data}", "\s*%{WORD:level}\s+(?<logtime>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+%{JAVACLASS:srcclass}(?::\d+)? -\s+%{GREEDYDATA:data}", "\s*%{WORD:level}\s+(?<logtime>\d{2}:\d{2}:\d{2})\s+%{DATA:srcclass}\s+%{GREEDYDATA:data}"] }
add_field => [ "received_at", "%{@timestamp}" ]
}
date {
match => [ "logtime", "YYYY-MM-dd HH:mm:ss", "HH:mm:ss" ]
}
}
}
filter {
# stacktrace java as one message
multiline {
#type => "all" # no type means for all inputs
pattern => "(^[a-zA-Z.]+(?:Error|Exception): .+)|(^\s+at .+)|(^\s+... \d+ more)|(^\s*Caused by:.+)"
what => "previous"
}
}
output {
elasticsearch {
host => localhost
protocol => "http"
}
}
{
"network": {
"servers": [ "<elasticsearch-server-hostname>:5043" ],
# The path to your client ssl certificate (optional)
#"ssl certificate": "./logstash-forwarder.crt",
# The path to your client ssl key (optional)
#"ssl key": "./logstash-forwarder.key",
# The path to your trusted ssl CA file. This is used
# to authenticate your downstream server.
"ssl ca": "/etc/pki/tls/certs/logstash-forwarder/logstash-forwarder.crt",
"timeout": 15
},
"files": [
{
"paths": [
"/var/log/cassandra/output.log",
"/var/log/cassandra/system.log"
],
"fields": { "type": "cassandra" }
}, {
"paths": [
"/var/log/spark/*/*.log"
],
"dead time": "1h",
"fields": { "type": "spark-worker" }
}, {
"paths": [
"/var/lib/spark/worker/worker-*/app-*/*/*.log",
"/var/lib/spark/worker/worker-*/app-*/*/stdout",
"/var/lib/spark/worker/worker-*/app-*/*/stderr"
],
"dead time": "1h",
"fields": { "type": "spark-app" }
}, {
"paths": [
"/var/lib/spark/worker/worker-*/driver-*/*.log",
"/var/lib/spark/worker/worker-*/driver-*/stdout",
"/var/lib/spark/worker/worker-*/driver-*/stderr"
],
"dead time": "1h",
"fields": { "type": "spark-driver" }
}
]
}
[Service]
ExecStart=/opt/logstash-forwarder/bin/logstash-forwarder -config /etc/logstash-forwarder.conf
WorkingDirectory=/var/lib/logstash-forwarder
Restart=always
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=logstash-forwarder
User=root
Group=root
Environment=
[Install]
WantedBy=multi-user.target
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment