Created
January 6, 2017 10:48
-
-
Save jsvd/cafccdcf20bd30969ed8419c8ae9a573 to your computer and use it in GitHub Desktop.
demo code for presentation about elasticsearch by João Duarte in @ Elastic Meetup Lisboa, 2017-01-05
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Document Analysis | |
# standard, letter, whitespace, ngram | |
POST _analyze | |
{ | |
"tokenizer": "ngram", | |
"text": "Filipe's shoes, yo." | |
} | |
POST _analyze | |
{ | |
"analyzer": "standard", | |
"text": "Filipe's shoes, yo." | |
} | |
DELETE my_index | |
POST my_index/_analyze | |
{ | |
"analyzer": "my_custom_analyzer", | |
"text": "He who controls the spice, controls the Universe." | |
} | |
# simple custom analyzer | |
PUT my_index | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"my_custom_analyzer": { | |
"type": "custom", | |
"char_filter": [], | |
"tokenizer": "standard", | |
"filter": [] | |
} | |
} | |
} | |
} | |
} | |
# custom analyzer with a few filters | |
PUT my_index/ | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"my_custom_analyzer": { | |
"type": "custom", | |
"char_filter": [], | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"stop" | |
] | |
} | |
} | |
} | |
} | |
} | |
# custom analyzer with stemmer | |
PUT my_index | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"my_custom_analyzer": { | |
"type": "custom", | |
"char_filter": [], | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"stop", | |
"stemmer" | |
] | |
} | |
} | |
} | |
} | |
} | |
# custom analyzer with char filters | |
PUT my_index | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"my_custom_analyzer": { | |
"type": "custom", | |
"char_filter": [ | |
"html_strip", "emoticons" | |
], | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"stemmer", | |
"stop" | |
] | |
} | |
}, | |
"char_filter": { | |
"emoticons": { | |
"type": "mapping", | |
"mappings": [ | |
":) => _happy_", | |
":( => _sad_" | |
] | |
} | |
} | |
} | |
} | |
} | |
POST my_index/_analyze | |
{ | |
"analyzer": "my_custom_analyzer", | |
"text": "<p>He who controls the spice, controls the universe :)</p>" | |
} | |
GET my_index/_settings | |
# Indexing | |
# see how inverted index would be created | |
POST my_index/_analyze | |
{ | |
"analyzer": "my_custom_analyzer", | |
"text": "He who controls the spice, controls the universe." | |
} | |
POST my_index/_analyze | |
{ | |
"analyzer": "my_custom_analyzer", | |
"text": "A mad man sees what he sees." | |
} | |
POST my_index/_analyze | |
{ | |
"analyzer": "my_custom_analyzer", | |
"text": "What if a mad man controlled the universe?" | |
} | |
GET my_index/_mapping | |
PUT my_index/_mapping/my_type | |
{ | |
"properties": { | |
"text": { | |
"type": "string", | |
"analyzer": "my_custom_analyzer" | |
} | |
} | |
} | |
## reminder: first create index w/ analyzer and add mapping | |
# index documents | |
POST _bulk | |
{ "index" : { "_index" : "my_index", "_type" : "mytype" } } | |
{ "text": "He who controls the spice, controls the universe." } | |
{ "index" : { "_index" : "my_index", "_type" : "mytype" } } | |
{ "text": "A mad man sees what he sees." } | |
{ "index" : { "_index" : "my_index", "_type" : "mytype" } } | |
{ "text": "What if a mad man controlled the universe?" } | |
# look at the documents | |
GET my_index/_search | |
# search for Mad | |
GET my_index/_search?q=Mad | |
GET _cat/indices | |
# match query | |
GET my_index/_search?explain | |
{ | |
"query": { | |
"match" : { | |
"text" : { | |
"query" : "controllable spice" | |
} | |
} | |
} | |
} | |
# match_phrase query | |
GET my_index/_search | |
{ | |
"query": { | |
"match_phrase" : { | |
"text" : { | |
"query" : "control spice", | |
"slop": 1 | |
} | |
} | |
} | |
} | |
# query time analyzer: difference between standard vs my_custom_analyzer | |
GET my_index/_search | |
{ | |
"query": { | |
"match" : { | |
"text" : { | |
"query" : "universal", | |
"analyzer": "my_custom_analyz" | |
} | |
} | |
} | |
} | |
GET news/_search | |
# score | |
GET news/_search | |
{ | |
"query": { | |
"match": { | |
"title": "donald trump jimmy fallon hillary clinton" | |
} | |
} | |
} | |
# relevancy boosting | |
# default boost is 1 | |
GET news/_search | |
{ | |
"query": { | |
"bool": { | |
"should": [ | |
{ | |
"match": { | |
"title": { | |
"query": "best movies netflix", | |
"boost": 2 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"content": { | |
"query": "best movies netflix" | |
} | |
} | |
} | |
], | |
"filter": [ | |
{ "term": { "media-type": "news" } } | |
] | |
} | |
} | |
} | |
# news source boosting | |
GET news/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ "bool": { | |
"should": [ | |
{ | |
"match": { "title": { "query": "best movies netflix" } } | |
}, | |
{ | |
"match": {"content": { "query": "best movies netflix" } } | |
} | |
] | |
} } | |
], | |
"should": [ | |
{ | |
"match_phrase": { | |
"source": { | |
"query": "Tech Investor News", | |
"boost": 2 | |
} | |
} | |
}, | |
{ | |
"match_phrase": { | |
"source": { | |
"query": "TechRadar" | |
} | |
} | |
} | |
] | |
} | |
} | |
} | |
## IMDB DATA | |
DELETE imdb | |
PUT imdb | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"my_custom_analyzer": { | |
"type": "custom", | |
"char_filter": [], | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"stop", | |
"stemmer" | |
] | |
} | |
} | |
} | |
} | |
} | |
PUT imdb/_mapping/movies | |
{ | |
"properties": { | |
"release_year": { | |
"type": "date", | |
"format": "yyyy" | |
}, | |
"plot": { | |
"type": "string", | |
"analyzer": "my_custom_analyzer" | |
} | |
} | |
} | |
GET imdb/_count | |
GET imdb/_search | |
{ | |
"query": { | |
"match": { | |
"simple_name": "The Waking Life" | |
} | |
} | |
} | |
# Highlighter | |
GET imdb/_search | |
{ | |
"query" : { | |
"bool": { | |
"must": [ | |
{ "match": { "plot": "sun explode spaceship" } } | |
], | |
"filter": [ | |
{ "range": { "num_votes": { "gt": 10000 } } } | |
] | |
} | |
}, | |
"highlight": { | |
"fields": { | |
"plot": {} | |
} | |
} | |
} | |
# more like this | |
GET imdb/movies/AVlrarz7VMWvHJXxP15Q | |
GET imdb/movies/AVlrasPoVMWvHJXxP5RW | |
GET imdb/_search | |
{ | |
"_source": [ | |
"simple_name", | |
"release_year", | |
"tag_line", | |
"genres", | |
"rating" | |
], | |
"query" : { | |
"bool": { | |
"must": [ | |
{ | |
"more_like_this" : { | |
"fields" : ["plot"], | |
"like" : [ | |
{ | |
"_index" : "imdb", | |
"_type" : "movies", | |
"_id" : "AVlrarz7VMWvHJXxP15Q" | |
}, | |
{ | |
"_index" : "imdb", | |
"_type" : "movies", | |
"_id" : "AVlrasPoVMWvHJXxP5RW" | |
} | |
] | |
} | |
} | |
], | |
"filter": [{ | |
"range": { | |
"num_votes": { "gt": 10000 } | |
} | |
}] | |
} | |
} | |
} | |
# Suggester | |
# failed match query | |
GET imdb/_search | |
{ | |
"query": { | |
"match": { "simple_name": "koyaniskatsi" } | |
} | |
} | |
# fuzzy query | |
GET imdb/_search | |
{ | |
"query": { | |
"match": { | |
"simple_name": { | |
"query": "koyaniskatsi", | |
"fuzziness": 2 | |
} | |
} | |
} | |
} | |
# another way - suggester | |
GET imdb/_search | |
{ | |
"query" : { | |
"match": { | |
"simple_name": "koyaniskatsi" | |
} | |
}, | |
"suggest" : { | |
"my-suggestion" : { | |
"text" : "koyaniskatsi", | |
"term" : { | |
"field" : "simple_name" | |
} | |
} | |
} | |
} | |
# correct query | |
GET imdb/_search | |
{ | |
"query": { | |
"match": { | |
"simple_name": { | |
"query": "koyaanisqatsi" | |
} | |
} | |
} | |
} | |
# query time search as you type | |
GET imdb/_search | |
{ | |
"_source": "name", | |
"query": { | |
"match_phrase_prefix" : { | |
"simple_name" : "In" | |
} | |
}, | |
"sort": [ | |
{ | |
"num_votes": { | |
"order": "desc" | |
} | |
} | |
] | |
} | |
# This can also | |
be done at index time with ngram tokenizers | |
POST _analyze | |
{ | |
"tokenizer": "ngram", | |
"text": "Inception" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment