Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Logstash config for custom crawler logs
input {
file {
path => "C:\dev\data\*"
type => "crawlerlog"
start_position => "beginning"
}
}
filter {
if [type] == "crawlerlog" {
csv {
separator => "|"
columns => ["log_timestamp","request_hash", "bot_profile", "session_id", "profile_cnt", "ip", "url", "profile_id"]
}
grok {
match => {"log_timestamp" => "\A%{DAY:day}%{SPACE}%{SYSLOGTIMESTAMP:date_time} GMT %{YEAR:year}%{GREEDYDATA:millis}"}
}
mutate {
add_field => ["grokked_timestamp", "%{day} %{date_time} %{year}"]
}
date {
match => ["grokked_timestamp", "EEE MMM dd HH:mm:ss yyyy"]
}
kv {
field_split => "="
source => "bot_profile"
}
kv {
field_split => "="
source => "session_id"
}
kv {
field_split => "="
source => "profile_cnt"
}
kv {
field_split => "="
source => "ip"
}
kv {
field_split => "="
source => "url"
}
kv {
field_split => "="
source => "profile_id"
}
grok {
patterns_dir => "./patterns"
match => {"New_bot_profile" => "%{LOCALE:Locale}%{GREEDYDATA:garbage}"}
}
mutate {
remove_field => ["log_timestamp", "grokked_timestamp", "day", "date_time", "year", "millis", "request_hash", "bot_profile", "session_id", "profile_cnt", "url", "profile_id", "garbage"]
rename => {"ip" => "IP"}
rename => {"New_bot_profile" => "BotProfile"}
rename => {"sessionID" => "SessionId"}
rename => {"profileCount" => "ProfileCount"}
rename => {"profile" => "Profile"}
convert => {"ProfileCount" => "integer"}
}
}
}
output {
elasticsearch {
action => "index"
hosts => "127.0.0.1:9200"
workers => 5
index => "bots"
}
stdout {
codec => rubydebug
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment