Last active
December 28, 2015 04:09
-
-
Save avleen/7440270 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ "comment": " | |
## We have a lot of data (~2Tb/day), and ~12 ES nodes, so we set the number of | |
## shards to 10, to distribute the logs roughly evenly. If you have less data, | |
## use fewer shards. | |
## It doesn't really make sense to have more shards, than you have servers. | |
## | |
## The index.routing.allocation.require.tag is used internally to make new | |
## indices on fast hardware. We run a nightly cron which moves older indices | |
## to slower hardware: https://gist.github.com/avleen/ecfb48f49d260b25e45e | |
## | |
## index.refresh_interval is 30s, so that we can bulk load logs quickly, and | |
## Not have ES spend a lot of time refreshing the index. | |
## | |
## The analyzer is designer to not create tokens for really small terms, and | |
## to only tokenize unique terms. | |
## | |
## We don't want any dynamic string fields to be analyzed, the | |
## dynamic_templates help with this. | |
## | |
## We have some fields which are hashes, where the values are all numbers. | |
## For these, we index these under the "byte_fields" object field, and set the | |
## mapping type to "integer" because we know they're short. This saves a lot of | |
## space compared to letting ElasticSearch map those by default to "long". | |
## | |
## Don't index any fields you don't need. Eg, "file" is set to not be indexed, | |
## because we can determine what we need based on "type" and other data. | |
## | |
## The "_all" field is disabled to save space, we only search on "message". | |
## | |
## IF YOU WANT TO USE THIS TEMPLATE, USE THE JSON BELOW, NOT THIS COMMENT. | |
## | |
"} | |
{ | |
"template": "logstash-*", | |
"settings" : { | |
"number_of_shards" : 10, | |
"number_of_replicas" : 0, | |
"index.cache.field.type" : "soft", | |
"index.refresh_interval" : "30s", | |
"index.query.default_field" : "message", | |
"index.routing.allocation.require.tag": "recent", | |
"analysis" : { | |
"filter" : { | |
"short_words" : { | |
"min" : 4, | |
"type" : "length" | |
} | |
}, | |
"analyzer" : { | |
"logstash_message" : { | |
"filter" : [ | |
"standard", | |
"lowercase", | |
"short_words", | |
"unique" | |
], | |
"type" : "custom", | |
"tokenizer" : "standard" | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"_default_": { | |
"_all": { "enabled": false }, | |
"dynamic_templates": [ | |
{ | |
"string_template" : { | |
"match" : "*", | |
"mapping": { "type": "string", "index": "not_analyzed" }, | |
"match_mapping_type" : "string" | |
} | |
}, | |
{ | |
"byte_template" : { | |
"path_match" : "byte_fields.*", | |
"mapping" : { "type": "integer", "index": "not_analyzed" } | |
} | |
} | |
], | |
"properties" : { | |
"message" : { "type" : "string", "index" : "analyzed", "index_options" : "docs", "analyzer" : "logstash_message" }, | |
"offset" : { "type": "long", "index" : "no" }, | |
"file" : { "type" : "string", "index" : "no" }, | |
"host" : { "type" : "string", "index" : "not_analyzed" }, | |
"tags": { "type": "string", "index" : "not_analyzed" }, | |
"@timestamp" : { "type" : "date", "index" : "not_analyzed" }, | |
"timestamp" : { "type" : "string", "index" : "no" }, | |
"index_timestamp" : { "type" : "date", "index" : "not_analyzed", "format" : "YYYY-MM-dd HH:mm:ss Z" }, | |
"type" : { "type" : "string", "index" : "not_analyzed" }, | |
"X_Forwarded_For" : { "type": "ip", "index" : "not_analyzed" }, | |
"response_bytes" : { "type": "long", "index" : "not_analyzed" }, | |
"http_version" : { "type": "float", "index" : "not_analyzed" }, | |
"response" : { "type": "integer", "index" : "not_analyzed" }, | |
"php_memory_usage_bytes" : { "type": "long", "index" : "not_analyzed" }, | |
"php_time_microsec" : { "type": "long", "index" : "not_analyzed" }, | |
"php_utime_microsec" : { "type": "long", "index" : "not_analyzed" }, | |
"php_stime_microsec" : { "type": "long", "index" : "not_analyzed" }, | |
"apache_time_microsec" : { "type": "long", "index" : "not_analyzed" }, | |
"is_ssl" : { "type": "integer", "index" : "not_analyzed" }, | |
"http_hostname" : { "type" : "string", "index": "not_analyzed" }, | |
"http_method" : { "type" : "string", "index": "not_analyzed" }, | |
"byte_fields" : { | |
"type" : "object", | |
"dynamic" : true, | |
"path": "full" | |
}, | |
"eventinate" : { | |
"type" : "object", | |
"dynamic" : true, | |
"path": "full" | |
}, | |
"geoip" : { | |
"type" : "object", | |
"dynamic": true, | |
"path": "full", | |
"properties" : { | |
"area_code" : { "type": "string", "index": "not_analyzed" }, | |
"city_name" : { "type": "string", "index": "not_analyzed" }, | |
"continent_code" : { "type": "string", "index": "not_analyzed" }, | |
"country_code2" : { "type": "string", "index": "not_analyzed" }, | |
"country_code3" : { "type": "string", "index": "not_analyzed" }, | |
"country_name" : { "type": "string", "index": "not_analyzed" }, | |
"dma_code" : { "type": "string", "index": "not_analyzed" }, | |
"ip" : { "type": "ip", "index": "not_analyzed" }, | |
"latitude" : { "type": "float", "index": "not_analyzed" }, | |
"longitude" : { "type": "float", "index": "not_analyzed" }, | |
"metro_code" : { "type": "float", "index": "not_analyzed" }, | |
"postal_code" : { "type": "string", "index": "not_analyzed" }, | |
"region" : { "type": "string", "index": "not_analyzed" }, | |
"region_name" : { "type": "string", "index": "not_analyzed" }, | |
"timezone" : { "type": "string", "index": "not_analyzed" } | |
} | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment