spinscale/kibana.snippets

## kibana.snippets
###################################
## Introduction to Elasticsearch ##
###################################

#########################
## If you do not want to run Elasticsearch yourself
## Test a free 14 day trial at
## https://www.elastic.co/cloud/
#########################


##########################
## Indexing & Searching ##
##########################


# Single document indexing
PUT books/_doc/1
{ "title" : "The Play Framework Cookbook", "category" : "Computer & IT", "price" : 34.99 }

# Bulk indexing
PUT books/_bulk
{ "index" : { "_id" : "1" } }
{ "title" : "The Play Framework Cookbook", "category" : "Computer & IT", "price" : 34.99 }
{ "index" : { "_id" : "2" } }
{ "title" : "Database Internals: A Deep Dive Into How Distributed Data Systems Work", "category":"Computer & IT", "price" : 44.99 }
{ "index" : { "_id" : "3" } }
{ "title" : "Ready Player One", "category": "Science Fiction", "price" : 10.99 }

GET books/_doc/1

GET books/_source/1

DELETE books/_doc/1

PUT books/_doc/1
{
  "title": "The Play Framework Cookbook",
  "category": "Computer & IT",
  "price": 34.99
}

POST books/_update/1
{
  "doc" : {
    "release_date" : "2011-08-11",
    "author" : "Alexander Reelsen"
  }
}

GET books/_doc/1

POST books/_update/1
{
  "script": {
    "source": "ctx._source.price -= 1.55",
    "lang": "painless"
  }
}

GET books/_doc/1

# simple search
GET books/_search?q=play

# search with dsl
GET books/_search
{
  "query": {
    "match": {
      "title": "play"
    }
  }
}

POST books/_forcemerge?max_num_segments=1

# search with explain
GET books/_search
{
  "explain": true,
  "query": {
    "match": {
      "title": "play"
    }
  }
}

PUT books/_doc/4
{
  "title": "The Play Framework Cookbook (2nd edition)",
  "category": "Programming",
  "price": 38.99
}


GET books/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "match": {
            "category.keyword": "Computer & IT"
          }
        }
      ],
      "must": [
        {
          "match": {
            "title": "play"
          }
        }
      ],
      "filter": [
        {
          "range": {
            "price": {
              "gte": 30
            }
          }
        }
      ]
    }
  }
}

##########################
## Aggregations ##########
##########################

# group by category
GET books/_search
{
  "size": 0,
  "aggs": {
    "by_category": {
      "terms": {
        "field": "category.keyword",
        "size": 10
      }
    }
  }
}

# group by category, get the avg price
# danger floating point in accuracies!
GET books/_search
{
  "size": 0,
  "aggs": {
    "by_category": {
      "terms": {
        "field": "category.keyword",
        "size": 10
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

# add some monitoring data, having autogenerated ids
PUT monitoring_data/_bulk
{ "index" : {} }
{ "duration_in_ms" : 123, "host" : "db01", "statement": "SELECT * from cars" }
{ "index" : {} }
{ "duration_in_ms" : 145, "host" : "db01", "statement": "SELECT * from cars" }
{ "index" : {} }
{ "duration_in_ms" : 185, "host" : "db01", "statement": "SELECT * from cars" }
{ "index" : {} }
{ "duration_in_ms" : 220, "host" : "db01", "statement": "SELECT * from cars" }
{ "index" : {} }
{ "duration_in_ms" : 450, "host" : "db02", "statement": "SELECT * from cars" }
{ "index" : {} }
{ "duration_in_ms" : 360, "host" : "db02", "statement": "SELECT * from cars" }

# Let's count distinct elements
GET monitoring_data/_search
{
  "size": 0,
  "aggs": {
    "number_of_hosts": {
      "cardinality": {
        "field": "host.keyword"
      }
    }
  }
}

# Let's do some percentile
GET monitoring_data/_search
{
  "size": 0,
  "aggs": {
    "number_of_hosts": {
      "percentiles": {
        "field": "duration_in_ms"
      }
    }
  }
}

# let's group per host first, seeing clearly
# a performance issue with one of the
# database hosts
GET monitoring_data/_search
{
  "size": 0,
  "aggs": {
    "by_host": {
      "terms": {
        "field": "host.keyword",
        "size": 10
      },
      "aggs": {
        "number_of_hosts": {
          "percentiles": {
            "field": "duration_in_ms"
          }
        }
      }
    }
  }
}


#########################
## Analysis  ############
#########################


### Tokenizer
GET _analyze
{
  "text": "quick brown fox",
  "tokenizer": "whitespace"
}

GET _analyze
{
  "text": "the lazy, white dog.",
  "tokenizer": "whitespace"
}

# see the commas magically being removed
# due to UCS#29 in the standard tokenizer
# that is also used by default, so you can omit it
GET _analyze
{
  "text": "the lazy, white dog.",
  "tokenizer": "standard"
}

# but the URLs is broken
GET _analyze
{
  "text": "this is an url https://www.jade-hs.de"
}

# let's fix the above issue by using a email analyzer
GET _analyze
{
  "text": "this is an url https://www.jade-hs.de",
  "tokenizer": "uax_url_email"
}


### Token Filter

# standard tokenization, no filtering
GET _analyze
{
  "text": "The Quick brown fox",
  "tokenizer": "standard"
}

# lowercase all the terms
GET _analyze
{
  "text": "The Quick brown fox",
  "tokenizer": "standard",
  "filter": [ "lowercase" ]
}

# remove stop words
GET _analyze
{
  "text": "The Quick brown fox",
  "tokenizer": "standard",
  "filter": [ "lowercase", "stop"]
}

# let's add a synonym
GET _analyze
{
  "text": "The Quick brown fox",
  "tokenizer": "standard",
  "filter": [
    "lowercase",
    "stop",
    {
      "type": "synonym",
      "synonyms": [
        "quick => fast"
      ]
    }
  ]
}

# let's make sure, that quick remains also
GET _analyze
{
  "text": "The Quick brown fox",
  "tokenizer": "standard",
  "filter": [
    "lowercase",
    "stop",
    {
      "type": "synonym",
      "synonyms": [
        "quick => fast, quick"
      ]
    }
  ]
}


# stemming
# plural being removed, but see the term experience
# which now is indistinguishable from experiment
# stemming can be overly aggressive
GET _analyze
{
  "text": "Waxolutionists - The smart blip experience",
  "tokenizer": "standard",
  "filter": [
    "lowercase",
    "stop",
    {
      "type": "stemmer",
      "name": "english"
    }
  ]
}


# compounding
# splits sub terms out of terms
# important for lots of german words
GET _analyze
{
  "text": "Blumentopf",
  "tokenizer": "standard",
  "filter": [
    "lowercase",
    "stop",
    {
      "type": "dictionary_decompounder",
      "word_list": [
        "topf"
      ]
    }
  ]
}

# overcompounding happens
# workaround might be the use of hyphenation patterns
GET _analyze
{
  "text": "Verstopfung",
  "tokenizer": "standard",
  "filter": [
    "lowercase",
    "stop",
    {
      "type": "dictionary_decompounder",
      "word_list": [
        "topf"
      ]
    }
  ]
}


# asciifolding
GET _analyze
{
  "text": [
    "München, Köln, Parkstraße",
    "Muenchen, Koeln, Parkstrasse"
  ],
  "tokenizer": "standard",
  "filter": [
    "lowercase",
    "stop",
    "asciifolding"
  ]
}

# or normalization?
GET _analyze
{
  "text": [
    "München, Köln, Parkstraße",
    "Muenchen, Koeln, Parkstrasse"
  ],
  "tokenizer": "standard",
  "filter": [
    "lowercase",
    "stop",
    "german_normalization"
  ]
}


# phonetic analysis requires you to
# install the analysis-phonetic plugin
# left as an exercise to the reader

# Creating an index with a certain analyzer configured
# for a certain field
PUT my_data
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_custom_analyzer": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "stop",
            "my_synonym_tokenizer"
          ]
        }
      },
      "filter": {
        "my_synonym_tokenizer": {
          "type": "synonym",
          "synonyms": [
            "quick => fast, quick"
          ]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "my_field": {
        "type": "text",
        "analyzer": "my_custom_analyzer"
      }
    }
  }
}

PUT my_data/_doc/1?refresh
{
  "my_field" : "quick brown fox"
}

# search for the synonym
GET my_data/_search?q=fast

GET my_data/_analyze
{
  "text": "quick brown fox",
  "field": "my_field"
}


##########################
## System overview #######
##########################

# check out static node configuration
GET _nodes

# check out dynamic node status
GET _nodes/stats

# check out dynamic node status, but human readable
GET _nodes/stats?human

# tab separated index overview
GET _cat/indices
	###################################
	## Introduction to Elasticsearch ##
	###################################

	#########################
	## If you do not want to run Elasticsearch yourself
	## Test a free 14 day trial at
	## https://www.elastic.co/cloud/
	#########################




	##########################
	## Indexing & Searching ##
	##########################


	# Single document indexing
	PUT books/_doc/1
	{ "title" : "The Play Framework Cookbook", "category" : "Computer & IT", "price" : 34.99 }

	# Bulk indexing
	PUT books/_bulk
	{ "index" : { "_id" : "1" } }
	{ "title" : "The Play Framework Cookbook", "category" : "Computer & IT", "price" : 34.99 }
	{ "index" : { "_id" : "2" } }
	{ "title" : "Database Internals: A Deep Dive Into How Distributed Data Systems Work", "category":"Computer & IT", "price" : 44.99 }
	{ "index" : { "_id" : "3" } }
	{ "title" : "Ready Player One", "category": "Science Fiction", "price" : 10.99 }

	GET books/_doc/1

	GET books/_source/1

	DELETE books/_doc/1

	PUT books/_doc/1
	{
	"title": "The Play Framework Cookbook",
	"category": "Computer & IT",
	"price": 34.99
	}

	POST books/_update/1
	{
	"doc" : {
	"release_date" : "2011-08-11",
	"author" : "Alexander Reelsen"
	}
	}

	GET books/_doc/1

	POST books/_update/1
	{
	"script": {
	"source": "ctx._source.price -= 1.55",
	"lang": "painless"
	}
	}

	GET books/_doc/1

	# simple search
	GET books/_search?q=play

	# search with dsl
	GET books/_search
	{
	"query": {
	"match": {
	"title": "play"
	}
	}
	}

	POST books/_forcemerge?max_num_segments=1

	# search with explain
	GET books/_search
	{
	"explain": true,
	"query": {
	"match": {
	"title": "play"
	}
	}
	}

	PUT books/_doc/4
	{
	"title": "The Play Framework Cookbook (2nd edition)",
	"category": "Programming",
	"price": 38.99
	}


	GET books/_search
	{
	"query": {
	"bool": {
	"should": [
	{
	"match": {
	"category.keyword": "Computer & IT"
	}
	}
	],
	"must": [
	{
	"match": {
	"title": "play"
	}
	}
	],
	"filter": [
	{
	"range": {
	"price": {
	"gte": 30
	}
	}
	}
	]
	}
	}
	}

	##########################
	## Aggregations ##########
	##########################

	# group by category
	GET books/_search
	{
	"size": 0,
	"aggs": {
	"by_category": {
	"terms": {
	"field": "category.keyword",
	"size": 10
	}
	}
	}
	}

	# group by category, get the avg price
	# danger floating point in accuracies!
	GET books/_search
	{
	"size": 0,
	"aggs": {
	"by_category": {
	"terms": {
	"field": "category.keyword",
	"size": 10
	},
	"aggs": {
	"avg_price": {
	"avg": {
	"field": "price"
	}
	}
	}
	}
	}
	}

	# add some monitoring data, having autogenerated ids
	PUT monitoring_data/_bulk
	{ "index" : {} }
	{ "duration_in_ms" : 123, "host" : "db01", "statement": "SELECT * from cars" }
	{ "index" : {} }
	{ "duration_in_ms" : 145, "host" : "db01", "statement": "SELECT * from cars" }
	{ "index" : {} }
	{ "duration_in_ms" : 185, "host" : "db01", "statement": "SELECT * from cars" }
	{ "index" : {} }
	{ "duration_in_ms" : 220, "host" : "db01", "statement": "SELECT * from cars" }
	{ "index" : {} }
	{ "duration_in_ms" : 450, "host" : "db02", "statement": "SELECT * from cars" }
	{ "index" : {} }
	{ "duration_in_ms" : 360, "host" : "db02", "statement": "SELECT * from cars" }

	# Let's count distinct elements
	GET monitoring_data/_search
	{
	"size": 0,
	"aggs": {
	"number_of_hosts": {
	"cardinality": {
	"field": "host.keyword"
	}
	}
	}
	}

	# Let's do some percentile
	GET monitoring_data/_search
	{
	"size": 0,
	"aggs": {
	"number_of_hosts": {
	"percentiles": {
	"field": "duration_in_ms"
	}
	}
	}
	}

	# let's group per host first, seeing clearly
	# a performance issue with one of the
	# database hosts
	GET monitoring_data/_search
	{
	"size": 0,
	"aggs": {
	"by_host": {
	"terms": {
	"field": "host.keyword",
	"size": 10
	},
	"aggs": {
	"number_of_hosts": {
	"percentiles": {
	"field": "duration_in_ms"
	}
	}
	}
	}
	}
	}





	#########################
	## Analysis ############
	#########################


	### Tokenizer
	GET _analyze
	{
	"text": "quick brown fox",
	"tokenizer": "whitespace"
	}

	GET _analyze
	{
	"text": "the lazy, white dog.",
	"tokenizer": "whitespace"
	}

	# see the commas magically being removed
	# due to UCS#29 in the standard tokenizer
	# that is also used by default, so you can omit it
	GET _analyze
	{
	"text": "the lazy, white dog.",
	"tokenizer": "standard"
	}

	# but the URLs is broken
	GET _analyze
	{
	"text": "this is an url https://www.jade-hs.de"
	}

	# let's fix the above issue by using a email analyzer
	GET _analyze
	{
	"text": "this is an url https://www.jade-hs.de",
	"tokenizer": "uax_url_email"
	}


	### Token Filter

	# standard tokenization, no filtering
	GET _analyze
	{
	"text": "The Quick brown fox",
	"tokenizer": "standard"
	}

	# lowercase all the terms
	GET _analyze
	{
	"text": "The Quick brown fox",
	"tokenizer": "standard",
	"filter": [ "lowercase" ]
	}

	# remove stop words
	GET _analyze
	{
	"text": "The Quick brown fox",
	"tokenizer": "standard",
	"filter": [ "lowercase", "stop"]
	}

	# let's add a synonym
	GET _analyze
	{
	"text": "The Quick brown fox",
	"tokenizer": "standard",
	"filter": [
	"lowercase",
	"stop",
	{
	"type": "synonym",
	"synonyms": [
	"quick => fast"
	]
	}
	]
	}

	# let's make sure, that quick remains also
	GET _analyze
	{
	"text": "The Quick brown fox",
	"tokenizer": "standard",
	"filter": [
	"lowercase",
	"stop",
	{
	"type": "synonym",
	"synonyms": [
	"quick => fast, quick"
	]
	}
	]
	}


	# stemming
	# plural being removed, but see the term experience
	# which now is indistinguishable from experiment
	# stemming can be overly aggressive
	GET _analyze
	{
	"text": "Waxolutionists - The smart blip experience",
	"tokenizer": "standard",
	"filter": [
	"lowercase",
	"stop",
	{
	"type": "stemmer",
	"name": "english"
	}
	]
	}


	# compounding
	# splits sub terms out of terms
	# important for lots of german words
	GET _analyze
	{
	"text": "Blumentopf",
	"tokenizer": "standard",
	"filter": [
	"lowercase",
	"stop",
	{
	"type": "dictionary_decompounder",
	"word_list": [
	"topf"
	]
	}
	]
	}

	# overcompounding happens
	# workaround might be the use of hyphenation patterns
	GET _analyze
	{
	"text": "Verstopfung",
	"tokenizer": "standard",
	"filter": [
	"lowercase",
	"stop",
	{
	"type": "dictionary_decompounder",
	"word_list": [
	"topf"
	]
	}
	]
	}


	# asciifolding
	GET _analyze
	{
	"text": [
	"München, Köln, Parkstraße",
	"Muenchen, Koeln, Parkstrasse"
	],
	"tokenizer": "standard",
	"filter": [
	"lowercase",
	"stop",
	"asciifolding"
	]
	}

	# or normalization?
	GET _analyze
	{
	"text": [
	"München, Köln, Parkstraße",
	"Muenchen, Koeln, Parkstrasse"
	],
	"tokenizer": "standard",
	"filter": [
	"lowercase",
	"stop",
	"german_normalization"
	]
	}


	# phonetic analysis requires you to
	# install the analysis-phonetic plugin
	# left as an exercise to the reader

	# Creating an index with a certain analyzer configured
	# for a certain field
	PUT my_data
	{
	"settings": {
	"analysis": {
	"analyzer": {
	"my_custom_analyzer": {
	"type": "custom",
	"tokenizer": "standard",
	"filter": [
	"lowercase",
	"stop",
	"my_synonym_tokenizer"
	]
	}
	},
	"filter": {
	"my_synonym_tokenizer": {
	"type": "synonym",
	"synonyms": [
	"quick => fast, quick"
	]
	}
	}
	}
	},
	"mappings": {
	"properties": {
	"my_field": {
	"type": "text",
	"analyzer": "my_custom_analyzer"
	}
	}
	}
	}

	PUT my_data/_doc/1?refresh
	{
	"my_field" : "quick brown fox"
	}

	# search for the synonym
	GET my_data/_search?q=fast

	GET my_data/_analyze
	{
	"text": "quick brown fox",
	"field": "my_field"
	}




	##########################
	## System overview #######
	##########################

	# check out static node configuration
	GET _nodes

	# check out dynamic node status
	GET _nodes/stats

	# check out dynamic node status, but human readable
	GET _nodes/stats?human

	# tab separated index overview
	GET _cat/indices