Skip to content

Instantly share code, notes, and snippets.

@ansell
Forked from sebnmuller/solr-filter.conf
Created April 18, 2019 03:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ansell/7c59964cce75f6ad03cc71f2ed7520c4 to your computer and use it in GitHub Desktop.
Save ansell/7c59964cce75f6ad03cc71f2ed7520c4 to your computer and use it in GitHub Desktop.
Logstash config for ingesting Solr logs
input {
file {
path => "/Users/sebastienmuller/comperio/projects/sintef/tmp/logs/indexing_logs_2/solr.log.3"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
# Extract event severity and timestamp
grok {
match => { "message" => "%{WORD:level}.+?- %{DATA:LogTime};" }
tag_on_failure => []
}
multiline {
pattern => "%{WORD}.+?- %{DATA};"
negate => true
what => "previous"
add_tag => []
}
# INFO level events treated differently than ERROR
if "INFO" in [level] {
grok {
match => {
"message" => ".+?; ((([a-zA-Z]+(\.|;|:))+) )+?\[%{WORD:collection}\].+?path=%{DATA:endpoint} params=\{%{DATA:params}\}.+?\{%{WORD:action}=\[%{DATA:docId}"
}
tag_on_failure => []
}
if [params] {
kv {
field_split => "&"
source => "params"
}
} else {
grok {
match => {
"message" => ".+?; ((([a-zA-Z]+(\.|;|:))+) )+?commits"
}
tag_on_failure => [ "drop" ]
add_field => {
"action" => "commit"
}
}
if "drop" in [tags] {
drop {}
}
}
}
# Error event implies stack track, which requires multiline parsing
if "ERROR" in [level] {
grok {
match => {
"message" => ".+?; ((([a-zA-Z]+(\.|;|:))+) )+%{DATA:reason}(\n\t)((.+?Caused by: ((([a-zA-Z]+(\.|;|:))+) )+)%{DATA:reason}(\n\t))+"
}
tag_on_failure => []
}
grok {
match => {
"message" => ".+?; ((([a-zA-Z]+(\.|;|:))+) )+?Error: %{GREEDYDATA:reason}"
}
tag_on_failure => []
}
}
# Remove intermediate tags, and multiline added randomly by multiline stage
mutate {
remove_tag => [ "multiline" ]
}
# Drop empty lines
if [message] =~ /^\s*$/ {
drop {}
}
}
output {
# Send directly to local Elasticsearch
elasticsearch_http {
host => "localhost"
template => "/Users/sebastienmuller/dev/logstash/logstash-1.5.2/bin/template.json"
index => "solr-%{+YYYY.MM.dd}"
template_overwrite => true
}
}
input {
file {
path => "~/solr.log"
}
}
filter {
# Extract event severity and timestamp
grok {
match => { "message" => "%{WORD:level}.+?- %{DATA:LogTime};" }
tag_on_failure => []
}
# Combine commit events into single message
multiline {
pattern => "^\t(commit\{)"
what => "previous"
}
# INFO level events treated differently than ERROR
if "INFO" in [level] {
grok {
match => {
"message" => ".+?; ((([a-zA-Z]+(\.|;|:))+) )+?\[%{WORD:collection}\].+?path=%{DATA:endpoint} params=\{%{DATA:params}\}.+?\{%{WORD:action}=\[%{DATA:docId}"
}
tag_on_failure => []
}
if [params] {
kv {
field_split => "&"
source => "params"
}
} else {
grok {
match => {
"message" => ".+?; ((([a-zA-Z]+(\.|;|:))+) )+?commits"
}
tag_on_failure => [ "drop" ]
add_field => {
"action" => "commit"
}
}
if "drop" in [tags] {
drop {}
}
}
}
# Error event implies stack track, which requires multiline parsing
if "ERROR" in [level] {
multiline {
pattern => "^\s"
what => "previous"
add_tag => [ "multiline_pre" ]
}
multiline {
pattern => "^Caused by"
what => "previous"
add_tag => [ "multiline_post" ]
}
if "multiline_post" in [tags] {
grok {
match => {
"message" => ".+?; ((([a-zA-Z]+(\.|;|:))+) )+%{DATA:reason}(\n\t)((.+?Caused by: ((([a-zA-Z]+(\.|;|:))+) )+)%{DATA:reason}(\n\t))+"
}
tag_on_failure => []
}
}
}
# Remove intermediate tags, and multiline added randomly by multiline stage
mutate {
remove_tag => [ "multiline_pre", "multiline_post", "multiline" ]
}
# Drop empty lines
if [message] =~ /^\s*$/ {
drop {}
}
}
output {
# Send directly to local Elasticsearch
elasticsearch_http {
host => "localhost"
template => "~/logstash/bin/logstash_solr_template.json"
index => "solr-%{+YYYY.MM.dd}"
template_overwrite => true
}
}
{
"template" : "solr-*",
"settings" : {
"index.refresh_interval" : "5s"
},
"mappings" : {
"_default_" : {
"_all" : {"enabled" : true},
"dynamic_templates" : [ {
"string_fields" : {
"match" : "*",
"match_mapping_type" : "string",
"mapping" : {
"type" : "string", "index" : "analyzed", "omit_norms" : true,
"fields" : {
"raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256}
}
}
}
} ],
"properties" : {
"@version": { "type": "string", "index": "not_analyzed" },
"LogTime" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss.SSS"
}
}
}
}
}
INFO - 2015-09-21 12:34:23.420; org.apache.solr.core.SolrDeletionPolicy; SolrDeletionPolicy.onCommit: commits: num=2
commit{dir=NRTCachingDirectory(MMapDirectory@D:\solution\solr\sintef_main\data\index lockFactory=NativeFSLockFactory@D:\solution\solr\sintef_main\data\index; maxCacheMB=48.0 maxMergeSizeMB=4.0),segFN=segments_1vj7,generation=87523}
commit{dir=NRTCachingDirectory(MMapDirectory@D:\solution\solr\sintef_main\data\index lockFactory=NativeFSLockFactory@D:\solution\solr\sintef_main\data\index; maxCacheMB=48.0 maxMergeSizeMB=4.0),segFN=segments_1vj8,generation=87524}
INFO - 2015-09-07 15:40:34.536; org.apache.solr.update.processor.LogUpdateProcessor; [sintef_main] webapp=/ path=/update/extract params={literal.source=epifile&literal.epi_file_title=GOFER+L4.0+Demonstratorer+V1.0.pdf&literal.title=GOFER+L4.0+Demonstratorer+V1.0.pdf&literal.id=epifile_211278&literal.epifileid_s=211278&literal.url=http://www.sintef.no/globalassets/upload/teknologi_samfunn/6060/prosjektfiler/gofer/gofer-l4.0-demonstratorer-v1.0.pdf&stream.url=http://www.sintef.no/globalassets/upload/teknologi_samfunn/6060/prosjektfiler/gofer/gofer-l4.0-demonstratorer-v1.0.pdf&literal.filesource_s=SiteFile} {} 0 65
INFO - 2015-09-07 15:40:07.578; org.apache.solr.update.processor.LogUpdateProcessor; [sintef_main] webapp=/ path=/update/extract params={literal.source=epifile&literal.epi_file_title=A05188_Ungdom+påvirker+ungdom.pdf&literal.title=A05188_Ungdom+påvirker+ungdom.pdf&literal.id=epifile_210994&literal.epifileid_s=210994&literal.url=http://www.sintef.no/globalassets/upload/teknologi_samfunn/5033/a05188_ungdom-pavirker-ungdom.pdf&stream.url=http://www.sintef.no/globalassets/upload/teknologi_samfunn/5033/a05188_ungdom-pavirker-ungdom.pdf&literal.filesource_s=SiteFile} {add=[epifile_210994 (1511661982236803072)]} 0 125
ERROR - 2015-09-07 15:40:34.537; org.apache.solr.common.SolrException; org.apache.solr.common.SolrException: org.apache.tika.exception.TikaException: Unable to extract PDF content
at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:225)
at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
at org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handleRequest(RequestHandlers.java:246)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:1976)
at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:241)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:208)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:220)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:122)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:171)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:103)
at org.apache.catalina.valves.AccessLogValve.invoke(AccessLogValve.java:950)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:116)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:408)
at org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:1070)
at org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:611)
at org.apache.tomcat.util.net.JIoEndpoint$SocketProcessor.run(JIoEndpoint.java:314)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)
at java.lang.Thread.run(Unknown Source)
Caused by: org.apache.tika.exception.TikaException: Unable to extract PDF content
at org.apache.tika.parser.pdf.PDF2XHTML.process(PDF2XHTML.java:88)
at org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:153)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:242)
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:219)
... 23 more
Caused by: java.io.IOException: javax.crypto.BadPaddingException: Given final block not properly padded
at javax.crypto.CipherInputStream.getMoreData(CipherInputStream.java:115)
at javax.crypto.CipherInputStream.read(CipherInputStream.java:233)
at javax.crypto.CipherInputStream.read(CipherInputStream.java:209)
at org.apache.pdfbox.pdmodel.encryption.SecurityHandler.encryptData(SecurityHandler.java:313)
at org.apache.pdfbox.pdmodel.encryption.SecurityHandler.decryptStream(SecurityHandler.java:412)
at org.apache.pdfbox.pdmodel.encryption.SecurityHandler.decrypt(SecurityHandler.java:384)
at org.apache.pdfbox.pdmodel.encryption.SecurityHandler.decryptObject(SecurityHandler.java:358)
at org.apache.pdfbox.pdmodel.encryption.SecurityHandler.proceedDecryption(SecurityHandler.java:195)
at org.apache.pdfbox.pdmodel.encryption.StandardSecurityHandler.decryptDocument(StandardSecurityHandler.java:156)
at org.apache.pdfbox.pdmodel.PDDocument.openProtection(PDDocument.java:1366)
at org.apache.pdfbox.pdmodel.PDDocument.decrypt(PDDocument.java:798)
at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:316)
at org.apache.tika.parser.pdf.PDF2XHTML.process(PDF2XHTML.java:72)
... 28 more
Caused by: javax.crypto.BadPaddingException: Given final block not properly padded
at com.sun.crypto.provider.CipherCore.doFinal(CipherCore.java:966)
at com.sun.crypto.provider.CipherCore.doFinal(CipherCore.java:824)
at com.sun.crypto.provider.AESCipher.engineDoFinal(AESCipher.java:436)
at javax.crypto.Cipher.doFinal(Cipher.java:2048)
at javax.crypto.CipherInputStream.getMoreData(CipherInputStream.java:112)
... 40 more
INFO - 2015-09-07 15:40:34.535; org.apache.solr.update.processor.LogUpdateProcessor; [sintef_main] webapp=/ path=/update/extract params={literal.source=epifile&literal.epi_file_title=GOFER+L4.0+Demonstratorer+V1.0.pdf&literal.title=GOFER+L4.0+Demonstratorer+V1.0.pdf&literal.id=epifile_211278&literal.epifileid_s=211278&literal.url=http://www.sintef.no/globalassets/upload/teknologi_samfunn/6060/prosjektfiler/gofer/gofer-l4.0-demonstratorer-v1.0.pdf&stream.url=http://www.sintef.no/globalassets/upload/teknologi_samfunn/6060/prosjektfiler/gofer/gofer-l4.0-demonstratorer-v1.0.pdf&literal.filesource_s=SiteFile} {} 0 65
INFO - 2015-09-07 15:40:17.874; org.apache.solr.core.SolrCore; [sintef_main] Registered new searcher Searcher@5dc4452b[sintef_main] main{StandardDirectoryReader(segments_1jd0:315460:nrt _1f7c(4.10.4):C48803/20864:delGen=4917 _2eyv(4.10.4):C7505 _2e8y(4.10.4):C20758/1:delGen=1 _2f26(4.10.4):C978 _2fc7(4.10.4):C1385 _2f58(4.10.4):C2309 _2f1m(4.10.4):C131 _2f8u(4.10.4):C1093 _2ff9(4.10.4):C642 _2ffj(4.10.4):C28 _2ff2(4.10.4):C6 _2ff5(4.10.4):C5 _2ff6(4.10.4):C2 _2ffc(4.10.4):C4 _2ffe(4.10.4):C4 _2fff(4.10.4):C2 _2ffh(4.10.4):C1 _2ffk(4.10.4):C5 _2ffl(4.10.4):C1 _2ffm(4.10.4):C5 _2ffn(4.10.4):C3 _2ffo(4.10.4):C5 _2ffp(4.10.4):C2 _2ffq(4.10.4):C6 _2ffr(4.10.4):C2 _2ffs(4.10.4):C3)}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment