nukemberg/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Logs index template for Elasticsearch

Mapping templates

Dynamic mapping templates have been defined for various field usage. Select the correct mapping for your field by prepending the specified prefix.
Example: a field named text_message will by mapped as an analyzed string field, a field named ni_request_tag will not be indexed.
The default mapping template for string fields is keyword.
In many cases you might want to modify the prefixes to match the current naming scheme in your code. e.g. I frequently change ID_* to *Id to accomodate field names like requestId. In typed languages like Java/Scala you could have the serializer emit fields with appropriate prefixes for you.
Prefixes


text_ - an analyzed text field using the default analyzer. Use this if you want to search on individual words or tokens in this field.
path_ - this field is a / separated hierarchial path, e.g. a file path or a URL path and you want to match all documents in a hierarchy. What this is means is that for a value of /a/b/c the document will be found when searching for /a, /a/b, /a/b/c. This mapping will also generate another field with the .keyword suffix for use in aggregations.
noAgg_ - this field is a keyword that can be search but cannot be aggregated - use this for things like request/session IDs but not for owner/user IDs as they are likely to be used in terms aggreations. By turning off aggregation we save disk space and performance.
tk_ - this field is both a text field and also needs to be available for keyword search and aggregations. This mapping will generate 2 fields - the original plus a field with the .keyword suffix which is a keyword field. E.g. for a field named tk_username will generate a text field named tk_username and a keyword field named tk_username.keyword
ki_ - this is a case insensitive keyword. Use this for token which should be case insensitive, E.g. log levels
ip_ - this is an IP address, use this for IPv4 or IPv6 data on which you want to search by subnet ranges
n_ - this is an integer field
l_ - this is a long integer field
f_ - this is a floating point number
d_ - this is a double precision floating point number
NI_ - this field is not indexed, it will show up in document but will not be available for searching of aggregation. Turning off indexing saves disk space and indexing overhead, especially on large text fields.
ID_ - ID field, a keyword without doc values because of high cardinality and lack of ordering

The pipeline

To make logging as simple as possible a pipeline might be used. This allows managing index name, rotation policy, date format parsing etc on the server side which is useful if you want to log directly to Elasticsearch using HTTP requests or without a complex log transport pipeline.

  
## log-index-template.json
{
  "version": 1,
  "index_patterns": [
    "logs-*"
  ],
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1,
    "analysis": {
      "analyzer": {
        "path_analyzer": {
          "tokenizer": "path_hierarchy"
        }
      },
      "normalizer": {
        "case_insensitive_upper": {
          "type": "custom",
          "char_filter": [],
          "filter": [
            "uppercase"
          ]
        }
      }
    }
  },
  "mappings": {
    "_source": {
      "enabled": true
    },
    "dynamic_templates": [
      {
        "analyzed_text": {
          "match_mapping_type": "string",
          "match": "text_*",
          "mapping": {
            "type": "text"
          }
        }
      },
      {
        "path": {
          "match_mapping_type": "string",
          "match": "path_*",
          "mapping": {
            "type": "text",
            "analyzer": "path_analyzer",
            "search_analyzer": "keyword",
            "fields": {
              "keyword": {
                "type": "keyword"
              }
            }
          }
        }
      },
      {
        "not_aggregated": {
          "match_mapping_type": "string",
          "match": "noAgg_*",
          "mapping": {
            "type": "keyword",
            "doc_values": false
          }
        }
      },
      {
        "text+keyword": {
          "match_mapping_type": "string",
          "match": "tk_*",
          "mapping": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ingore_above": 256
              }
            }
          }
        }
      },
      {
        "case_insensitive_keyword": {
          "match_mapping_type": "string",
          "match": "ki_*",
          "mapping": {
            "type": "keyword",
            "normalizer": "case_insensitive_upper"
          }
        }
      },
      {
        "ip_address": {
          "match_mapping_type": "string",
          "match": "ip_*",
          "mapping": {
            "type": "ip"
          }
        }
      },
      {
        "double_as_float": {
          "match_mapping_type": "double",
          "match": "f_*",
          "mapping": {
            "type": "float"
          }
        }
      },
      {
        "long_as_float": {
          "match_mapping_type": "long",
          "match": "f_*",
          "mapping": {
            "type": "float"
          }
        }
      },
      {
        "double": {
          "match_mapping_type": "double",
          "match": "d_*",
          "mapping": {
            "type": "double"
          }
        }
      },
      {
        "long_as_double": {
          "match_mapping_type": "long",
          "match": "d_*",
          "mapping": {
            "type": "double"
          }
        }
      },
      {
        "long_as_integer": {
          "match_mapping_type": "long",
          "match": "n_*",
          "mapping": {
            "type": "integer"
          }
        }
      },
      {
        "long": {
          "match_mapping_type": "long",
          "match": "l_*",
          "mapping": {
            "type": "long"
          }
        }
      },
      {
        "not_indexed": {
          "match_mapping_type": "*",
          "match": "NI_*",
          "mapping": {
            "index": false
          }
        }
      },
      {
        "ids": {
          "match_mapping_type": "string",
          "match": "ID_*",
          "mapping": {
            "type": "keyword",
            "doc_values": false
          }
        }
      },
      {
        "error_props": {
          "path_match": "error.*",
          "mapping": {
            "index": false
          }
        }
      },
      {
        "default": {
          "match_mapping_type": "string",
          "mapping": {
            "type": "keyword"
          }
        }
      }
    ],
    "properties": {
      "extra": {
        "type": "object",
        "enabled": false
      }
      "service": {
        "type": "keyword",
        "normalizer": "case_insensitive_upper"
      },
      "timestamp": {
        "type": "date"
      },
      "@timestamp": {
        "type": "date"
      },
      "level": {
        "type": "keyword",
        "normalizer": "case_insensitive_upper"
      },
      "message": {
        "type": "text"
      },
      "stack": {
        "type": "text"
      },
      "error": {
        "type": "object",
        "properties": {
          "stack": {
            "type": "text"
          },
          "message": {
            "type": "text"
          }
        }
      }
    }
  }
}

## pipeline-monthly.json
{
    "description" : "Handle log messages, send to timebased index",
    "processors" : [
        {
            "date" : {
                "field" : "timestamp",
                "target_field" : "timestamp",
                "formats" : ["ISO8601", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "UNIX_MS", "UNIX"]
            }
        },
        {
            "date_index_name" : {
                "field" : "timestamp",
                "index_name_prefix" : "logs-",
                "date_rounding" : "M",
                "index_name_format": "yyyy.MM",
                "date_formats": ["ISO8601", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "UNIX_MS", "UNIX"]
            }
        }
    ]
}
	{
	"version": 1,
	"index_patterns": [
	"logs-*"
	],
	"settings": {
	"number_of_shards": 3,
	"number_of_replicas": 1,
	"analysis": {
	"analyzer": {
	"path_analyzer": {
	"tokenizer": "path_hierarchy"
	}
	},
	"normalizer": {
	"case_insensitive_upper": {
	"type": "custom",
	"char_filter": [],
	"filter": [
	"uppercase"
	]
	}
	}
	}
	},
	"mappings": {
	"_source": {
	"enabled": true
	},
	"dynamic_templates": [
	{
	"analyzed_text": {
	"match_mapping_type": "string",
	"match": "text_*",
	"mapping": {
	"type": "text"
	}
	}
	},
	{
	"path": {
	"match_mapping_type": "string",
	"match": "path_*",
	"mapping": {
	"type": "text",
	"analyzer": "path_analyzer",
	"search_analyzer": "keyword",
	"fields": {
	"keyword": {
	"type": "keyword"
	}
	}
	}
	}
	},
	{
	"not_aggregated": {
	"match_mapping_type": "string",
	"match": "noAgg_*",
	"mapping": {
	"type": "keyword",
	"doc_values": false
	}
	}
	},
	{
	"text+keyword": {
	"match_mapping_type": "string",
	"match": "tk_*",
	"mapping": {
	"type": "text",
	"fields": {
	"keyword": {
	"type": "keyword",
	"ingore_above": 256
	}
	}
	}
	}
	},
	{
	"case_insensitive_keyword": {
	"match_mapping_type": "string",
	"match": "ki_*",
	"mapping": {
	"type": "keyword",
	"normalizer": "case_insensitive_upper"
	}
	}
	},
	{
	"ip_address": {
	"match_mapping_type": "string",
	"match": "ip_*",
	"mapping": {
	"type": "ip"
	}
	}
	},
	{
	"double_as_float": {
	"match_mapping_type": "double",
	"match": "f_*",
	"mapping": {
	"type": "float"
	}
	}
	},
	{
	"long_as_float": {
	"match_mapping_type": "long",
	"match": "f_*",
	"mapping": {
	"type": "float"
	}
	}
	},
	{
	"double": {
	"match_mapping_type": "double",
	"match": "d_*",
	"mapping": {
	"type": "double"
	}
	}
	},
	{
	"long_as_double": {
	"match_mapping_type": "long",
	"match": "d_*",
	"mapping": {
	"type": "double"
	}
	}
	},
	{
	"long_as_integer": {
	"match_mapping_type": "long",
	"match": "n_*",
	"mapping": {
	"type": "integer"
	}
	}
	},
	{
	"long": {
	"match_mapping_type": "long",
	"match": "l_*",
	"mapping": {
	"type": "long"
	}
	}
	},
	{
	"not_indexed": {
	"match_mapping_type": "*",
	"match": "NI_*",
	"mapping": {
	"index": false
	}
	}
	},
	{
	"ids": {
	"match_mapping_type": "string",
	"match": "ID_*",
	"mapping": {
	"type": "keyword",
	"doc_values": false
	}
	}
	},
	{
	"error_props": {
	"path_match": "error.*",
	"mapping": {
	"index": false
	}
	}
	},
	{
	"default": {
	"match_mapping_type": "string",
	"mapping": {
	"type": "keyword"
	}
	}
	}
	],
	"properties": {
	"extra": {
	"type": "object",
	"enabled": false
	}
	"service": {
	"type": "keyword",
	"normalizer": "case_insensitive_upper"
	},
	"timestamp": {
	"type": "date"
	},
	"@timestamp": {
	"type": "date"
	},
	"level": {
	"type": "keyword",
	"normalizer": "case_insensitive_upper"
	},
	"message": {
	"type": "text"
	},
	"stack": {
	"type": "text"
	},
	"error": {
	"type": "object",
	"properties": {
	"stack": {
	"type": "text"
	},
	"message": {
	"type": "text"
	}
	}
	}
	}
	}
	}
	{
	"description" : "Handle log messages, send to timebased index",
	"processors" : [
	{
	"date" : {
	"field" : "timestamp",
	"target_field" : "timestamp",
	"formats" : ["ISO8601", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "UNIX_MS", "UNIX"]
	}
	},
	{
	"date_index_name" : {
	"field" : "timestamp",
	"index_name_prefix" : "logs-",
	"date_rounding" : "M",
	"index_name_format": "yyyy.MM",
	"date_formats": ["ISO8601", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "UNIX_MS", "UNIX"]
	}
	}
	]
	}