Skip to content

Instantly share code, notes, and snippets.

@endrit-b
Last active April 26, 2018 22:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save endrit-b/f3c9d727db8de8f08216e104ddd7ea94 to your computer and use it in GitHub Desktop.
Save endrit-b/f3c9d727db8de8f08216e104ddd7ea94 to your computer and use it in GitHub Desktop.
ElasticSearch 5.x - A practical overview of ES features
GET /
POST /my-index/my-doc/1
{
"body": "foo"
}
GET /my_index/my_doc/_search
{
"query": {
"match": {
"body": "bar"
}
}
}
PUT /library
{
"settings": {
"index.number_of_shards": 1,
"index.number_of_replicas": 0
}
}
#__________________________
#
# Bulk indexing and Search
#
#__________________________
#
# When you have a lot of docs to index you should
# use bulk API of ES
POST /library/books/_bulk
{"index": {"_id": 1}}
{"title": "The quick brown fox", "price": 5, "colors": ["red", "green", "blue"]}
{"index": {"_id": 2}}
{"title": "The quick brown fox jumps over the lazy dog", "price": 15, "colors": ["blue", "yellow"]}
{"index": {"_id": 3}}
{"title": "The quick brown fox jumps over the lazy dog", "price": 8, "colors": ["red", "blue"]}
{"index": {"_id": 4}}
{"title": "Brown fox brown dog", "price": 2, "colors": ["black", "yellow", "red", "blue"]}
{"index": {"_id": 5}}
{"title": "Lazy dog", "price": 9, "colors": ["red", "blue", "green"]}
GET library/books/_search
GET library/books/_search
{
"query": {
"match": {
"title": "fox"
}
}
}
# How about 'quick' and 'dog'?
GET library/books/_search
{
"query": {
"match": {
"title": "quick dog"
}
}
}
# let's be more strict when we search
GET library/books/_search
{
"query": {
"match_phrase": {
"title": "quick brown"
}
}
}
# Results are ranked based on relevance score (_score)
GET library/books/_search
{
"query": {
"match": {
"title": "quick"
}
}
}
#_____________________
# We can also do some boolean query combination
#
# Let's find all docs with "quick" and "lazy dog"
GET /library/books/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "quick"
}
},
{
"match_phrase": {
"title": "lazy dog"
}
}
]
}
}
}
#___________________________________________
# or we can negate the query clause
GET /library/books/_search
{
"query": {
"bool": {
"must_not": [
{
"match": {
"title": "lazy"
}
},
{
"match_phrase": {
"title": "quick dog"
}
}
]
}
}
}
#___________________________________________________
# Combinations can be boosted for different effects
GET /library/books/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"title": {
"query": "quick dog"
}
}
},
{
"match_phrase": {
"title": {
"query": "lazy dog",
"boost": 3
}
}
}
]
}
}
}
#__________________________
# Sometimes, for better distinction in the UI side
# we tend to highlight the matched terms
GET /library/books/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"title": {
"query": "quick dog"
}
}
},
{
"match_phrase": {
"title": {
"query": "lazy dog",
"boost": 3
}
}
}
]
}
},
"highlight": {
"fields": {
"title": {}
}
}
}
# More info on: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-highlighting.html
#___________________________________
# We can also perform filtering
# Filtering is faster than quering
GET /library/books/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "dog"
}
}
],
"filter": {
"range": {
"price": {
"gte": 5,
"lte": 10
}
}
}
}
}
}
# Filtering can be applied without query clause
GET /library/books/_search
{
"query": {
"bool": {
"filter": {
"range": {
"price": {
"gte": 5
}
}
}
}
}
}
# More info on: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html
#_________________________________________________________
# How did that work?
# We need to know and understand how to tune elastic search,
# in order to make it search in a "managed" manner
# _analysis
# Analysis = tokenization + token filters
GET /library/_analyze
{
"tokenizer": "standard",
"text": "Brown fox brown dog"
}
# Token filters can manipulate these token
GET /library/_analyze
{
"tokenizer": "standard",
"filter": ["lowercase"],
"text": "Brown fox brown dog"
}
# There is a wide array of toke filters
GET /library/_analyze
{
"tokenizer": "standard",
"filter": ["lowercase", "unique"],
"text": "Brown fox brown dog"
}
#___________________________________________________
# A tokenizer + 0 or more token filters = Analyzers
GET /library/_analyze
{
"analyzer": "standard",
"text": "Brown fox brown dog"
}
#___________________________________________________
# Understanding analysis is very important, because
# the emmitted tokens can significatly change the results
# helps your queries be more relevant
GET /library/_analyze
{
"tokenizer": "standard",
"filter": ["lowercase"],
"text": "ThE quick.brown_FOx Jumped! $19.95 @ 3.0"
}
GET /library/_analyze
{
"tokenizer": "letter",
"filter": ["lowercase"],
"text": "ThE quick.brown_FOx Jumped! $19.95 @ 3.0"
}
# Another example is with uax_url_email tokenizer
GET /library/_analyze
{
"tokenizer": "standard",
"text": "example@example.com website: https:/elastic.co"
}
GET /library/_analyze
{
"tokenizer": "uax_url_email",
"text": "example@example.com website: https://elastic.co"
}
# More info on: https://www.elastic.co/guide/en/elasticsearch/reference/5.6/analysis.html
#_____________________________________
# Aggregations can be used to explore
# and extract insights from your data
GET /library/books/_search
{
"size": 0,
"aggs": {
"popular-colors": {
"terms": {
"field": "colors.keyword"
}
}
}
}
# We can use aggs alongside with search
GET /library/books/_search
{
"query": {
"match": {
"title": "dog"
}
},
"aggs": {
"popular-colors": {
"terms": {
"field": "colors.keyword"
}
}
}
}
# Aggregations can be nested, and be used to perform calculations
GET /library/books/_search
{
"size": 0,
"aggs": {
"popular-colors": {
"terms": {
"field": "colors.keyword"
},
"aggs": {
"avg-price-per-color": {
"avg": {
"field": "price"
}
}
}
}
}
}
# Documents can be updated at any time by re-indexing them
POST /library/books/4
{
"title": "The Brown fox and a brown dog",
"price": 12,
"colors": ["black", "red", "blue"]
}
# or by using the _update API, for partial updates
POST /library/books/4/_update
{
"doc": {
"price": 6
}
}
GET /library/books/4
#____________________________________________________
# Elastic search dynamically defines index schema
# when documents are indexed - tries to interfere the data type
GET /library/books/_mapping
# We can define the mapping (schema) when we create the index
PUT /famous-librarians
{
"settings": {
"index": {
"number_of_shards": 2,
"number_of_replicas": 0,
"analysis": {
"analyzer": {
"my-desc-analyzer": {
"type": "custom",
"tokenizer": "uax_url_email",
"filter": ["lowercase"]
}
}
}
}
},
"mappings": {
"librarian": {
"properties": {
"name":{
"type": "text"
},
"fave-colors": {
"type": "keyword"
},
"birth-date": {
"type": "date",
"format": "year_month_day"
},
"hometowrn": {
"type": "geo_point"
},
"description": {
"type": "text",
"analyzer": "my-desc-analyzer"
}
}
}
}
}
GET /famous-librarians/_mapping
PUT /famous-librarians/librarian/1
{
"name": "Filan Fisteku",
"colors": ["yellow", "black"],
"birthdate": "1877-11-11",
"hometown": {
"lat": 32.456478,
"lon": -87.421111
},
"description": "Pioneered the establishment of libraries in Balkans - filan.fisteku@fiction.com"
}
GET /famous-librarians/librarian/1
# be careful when you do this.
DELETE /library
DELETE /famous-librarians
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment