-
-
Save httpstergeek/5fd08b9bc750e2d1954de78b063a092a to your computer and use it in GitHub Desktop.
Splunk bot Transforms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is posted as is and use at your own risk. | |
# Props might look something like this | |
# REPORT-bots1 = google_bot,bot_bot1,bot_bot2,bing_bot,other_bot,exabot_bot,james_bot,spinner_bot,yeti_bot,baiduspider_bot,yahoo_bots,360Spider_bots,sogouspider_bots,robospider_bots,crystal_bot,apple_bot,java_bot,java_bot,facebook_bot,admantx_bot,voltron_bot,r_bot,genieo_bot,mech_bot,synthenesio_bot,apache_bot,hi_bot,pinterest_bot,ia_bot,webindex_bot,go_bot,curious_bot,curl_bot,twitter_bot,logic_bot,amazon_bot,kerrigan_bot,sta_bot,http_bot,python_bot,skype_bot,guzzle_bot,hub_bot,ning_bot,hotpads_bot,voltron_bot,zend_bot,zon_bot,crows_bot,query_bot,wp_bot,fast_bot,meta_bot,ycache_bot,seos_bot,re_bot,cis_bot,ruby_bot,cake_bot,acc_bot,xplay_bot,php_bot,group_bot,ok_bot,typhoeus_bot,feed_bot,jak_bot,sfeed_bot,rcrwaler_bot,echo_bot,mercury_bot,nim_bot,scrapy_bot | |
# | |
[bot_bot1] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>[^\(\s;/]+[Bb]ot/v{0,1}\d([^\s;\)\(]{1,9}){0,1}) | |
[bot_bot2] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>[^\s;/\(]+[Bb]ot-{0,1}([^\s\(;\)]+){0,1}) | |
[bing_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>([^-]{1,7}-){0,1}bing([^\s;]{1,9}){0,1}) | |
[apple_bot] | |
SOURCE_KEY = useragent | |
REGEX=\((?<bot>Applebot/[^;]+) | |
[google_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>([^-\s]{1,13}-){0,1}Google([^\s;\)]{1,16}){0,1}) | |
[other_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>(?:YextBot|heritrix|GroupHigh)\S+) | |
[exabot_bot] | |
SOURCE_KEY = useragent | |
REGEX = \((?<bot>Exabot-Thumbnails)\) | |
[james_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>James BOT) | |
[spinner_bot] | |
SOURCE_KEY = useragent | |
REGEX = \((?<bot>Spinn3r \S+)\) | |
[yeti_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Yeti\S+) | |
[baiduspider_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Baiduspider[^;]+) | |
[yahoo_bots] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Yahoo\!\sSlurp) | |
[360Spider_bots] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>360Spider) | |
[sogouspider_bots] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Sogou web spider) | |
[robospider_bots] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>robospider) | |
[crystal_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>CrystalSemanticsBot) | |
[spotxchange_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>SpotXchange/[^\s]+) | |
[java_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Java[^\s]+) | |
[java_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Java[^\s]+) | |
[facebook_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>facebook[^\s]+) | |
[admantx_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>ADmantX[^\s]+) | |
[voltron_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>voltron[^\s]+) | |
[r_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>R|r)\s | |
[genieo_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Genieo[^\s]+) | |
[mech_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>(WWW-){0,1}Mechanize[^\s]+) | |
[synthenesio_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Synthesio[^\s]+) | |
[apache_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Apache[^\s]+) | |
[hi_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>hi-api[^\s]+) | |
[pinterest_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Pinterest[^\s]+) | |
[ia_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>ia_archiver[^\s]+) | |
[webindex_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>WebIndex[^\s]+) | |
[go_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Go\s[^\s]+) | |
[curious_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Curious\sGeorge[^\s]+) | |
[curl_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>curl[^\s]+) | |
[twitter_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Twitter[^\s]+(\s[^\s]+){0,1}) | |
[logic_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>LogicMonitor[^\s]+) | |
[amazon_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Amazon[^\s]+(\s[^\s]+){0,1}) | |
[kerrigan_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Kerrigan[^\s]+) | |
[sta_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>SOFTWING_TEAR[^\s]+) | |
[http_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>HTTPClient[^\s]+) | |
[python_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Python[^\s;]+) | |
[skype_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Skype[^\s]+) | |
[guzzle_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Guzzle[^\s]+) | |
[hub_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>HubSpot[^\s]+) | |
[ning_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>ning[^\s]+) | |
[hotpads_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>hotpads[^\s]+) | |
[voltron_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>voltron[^\s]+) | |
[zend_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Zend_Htt[^\s]+) | |
[zon_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Zon[^\s]+) | |
[crows_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Crowsnest[^\s]+) | |
[query_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>QuerySee[^\s]+) | |
[wp_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>WordPres[^\s;]+) | |
[fast_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>FAST\sEnterprise\sCrawler[^\s]+) | |
[meta_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>MetaURI\s[^\s]+) | |
[ycache_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>YahooCach[^\s]+) | |
[seos_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>SEOstats\s[^\s]+) | |
[re_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Domain\sRe[^\s]+) | |
[cis_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>cis[^\s]+) | |
[ruby_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Rub[^\s]+) | |
[cake_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>CakepPH[^\s]+) | |
[acc_bot] | |
SOURCE_KEY = useragent | |
REGEX = (?<bot>Accountable[^\s]+) | |
[xplay_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>XPlay[^\s]+) | |
[php_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>PHPCraw[^\s]+) | |
[group_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>GroupMe[^\s]+) | |
[ok_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>okhtt[^\s]+) | |
[typhoeus_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Typhoe[^\s]+) | |
[feed_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Feed[^\s]+) | |
[jak_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Jakart[^\s]+) | |
[sfeed_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Superfeed[^\s]+) | |
[rcrwaler_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>RavenCrawl[^\s]+) | |
[echo_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Echop[^\s]+) | |
[mercury_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Mercu[^\s]+) | |
[nim_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>NimBUS[^\s]+) | |
[scrapy_bot] | |
SOURCE_KEY = useragent | |
REGEX = ^(?<bot>Scrapy[^\s]+) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi,
I'm new in Splunk.
Could you explain more in details how did you implement this? I have copied transforms.conf to my $SPLUNK_HOME/etc/system/local with content as you have nothing has changed.
Thanks in advance