Skip to content

Instantly share code, notes, and snippets.

@avleen
Last active December 28, 2015 04:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avleen/7440270 to your computer and use it in GitHub Desktop.
Save avleen/7440270 to your computer and use it in GitHub Desktop.
{ "comment": "
## We have a lot of data (~2Tb/day), and ~12 ES nodes, so we set the number of
## shards to 10, to distribute the logs roughly evenly. If you have less data,
## use fewer shards.
## It doesn't really make sense to have more shards, than you have servers.
##
## The index.routing.allocation.require.tag is used internally to make new
## indices on fast hardware. We run a nightly cron which moves older indices
## to slower hardware: https://gist.github.com/avleen/ecfb48f49d260b25e45e
##
## index.refresh_interval is 30s, so that we can bulk load logs quickly, and
## Not have ES spend a lot of time refreshing the index.
##
## The analyzer is designer to not create tokens for really small terms, and
## to only tokenize unique terms.
##
## We don't want any dynamic string fields to be analyzed, the
## dynamic_templates help with this.
##
## We have some fields which are hashes, where the values are all numbers.
## For these, we index these under the "byte_fields" object field, and set the
## mapping type to "integer" because we know they're short. This saves a lot of
## space compared to letting ElasticSearch map those by default to "long".
##
## Don't index any fields you don't need. Eg, "file" is set to not be indexed,
## because we can determine what we need based on "type" and other data.
##
## The "_all" field is disabled to save space, we only search on "message".
##
## IF YOU WANT TO USE THIS TEMPLATE, USE THE JSON BELOW, NOT THIS COMMENT.
##
"}
{
"template": "logstash-*",
"settings" : {
"number_of_shards" : 10,
"number_of_replicas" : 0,
"index.cache.field.type" : "soft",
"index.refresh_interval" : "30s",
"index.query.default_field" : "message",
"index.routing.allocation.require.tag": "recent",
"analysis" : {
"filter" : {
"short_words" : {
"min" : 4,
"type" : "length"
}
},
"analyzer" : {
"logstash_message" : {
"filter" : [
"standard",
"lowercase",
"short_words",
"unique"
],
"type" : "custom",
"tokenizer" : "standard"
}
}
}
},
"mappings": {
"_default_": {
"_all": { "enabled": false },
"dynamic_templates": [
{
"string_template" : {
"match" : "*",
"mapping": { "type": "string", "index": "not_analyzed" },
"match_mapping_type" : "string"
}
},
{
"byte_template" : {
"path_match" : "byte_fields.*",
"mapping" : { "type": "integer", "index": "not_analyzed" }
}
}
],
"properties" : {
"message" : { "type" : "string", "index" : "analyzed", "index_options" : "docs", "analyzer" : "logstash_message" },
"offset" : { "type": "long", "index" : "no" },
"file" : { "type" : "string", "index" : "no" },
"host" : { "type" : "string", "index" : "not_analyzed" },
"tags": { "type": "string", "index" : "not_analyzed" },
"@timestamp" : { "type" : "date", "index" : "not_analyzed" },
"timestamp" : { "type" : "string", "index" : "no" },
"index_timestamp" : { "type" : "date", "index" : "not_analyzed", "format" : "YYYY-MM-dd HH:mm:ss Z" },
"type" : { "type" : "string", "index" : "not_analyzed" },
"X_Forwarded_For" : { "type": "ip", "index" : "not_analyzed" },
"response_bytes" : { "type": "long", "index" : "not_analyzed" },
"http_version" : { "type": "float", "index" : "not_analyzed" },
"response" : { "type": "integer", "index" : "not_analyzed" },
"php_memory_usage_bytes" : { "type": "long", "index" : "not_analyzed" },
"php_time_microsec" : { "type": "long", "index" : "not_analyzed" },
"php_utime_microsec" : { "type": "long", "index" : "not_analyzed" },
"php_stime_microsec" : { "type": "long", "index" : "not_analyzed" },
"apache_time_microsec" : { "type": "long", "index" : "not_analyzed" },
"is_ssl" : { "type": "integer", "index" : "not_analyzed" },
"http_hostname" : { "type" : "string", "index": "not_analyzed" },
"http_method" : { "type" : "string", "index": "not_analyzed" },
"byte_fields" : {
"type" : "object",
"dynamic" : true,
"path": "full"
},
"eventinate" : {
"type" : "object",
"dynamic" : true,
"path": "full"
},
"geoip" : {
"type" : "object",
"dynamic": true,
"path": "full",
"properties" : {
"area_code" : { "type": "string", "index": "not_analyzed" },
"city_name" : { "type": "string", "index": "not_analyzed" },
"continent_code" : { "type": "string", "index": "not_analyzed" },
"country_code2" : { "type": "string", "index": "not_analyzed" },
"country_code3" : { "type": "string", "index": "not_analyzed" },
"country_name" : { "type": "string", "index": "not_analyzed" },
"dma_code" : { "type": "string", "index": "not_analyzed" },
"ip" : { "type": "ip", "index": "not_analyzed" },
"latitude" : { "type": "float", "index": "not_analyzed" },
"longitude" : { "type": "float", "index": "not_analyzed" },
"metro_code" : { "type": "float", "index": "not_analyzed" },
"postal_code" : { "type": "string", "index": "not_analyzed" },
"region" : { "type": "string", "index": "not_analyzed" },
"region_name" : { "type": "string", "index": "not_analyzed" },
"timezone" : { "type": "string", "index": "not_analyzed" }
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment