Skip to content

Instantly share code, notes, and snippets.

@jsvd
Created January 6, 2017 10:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jsvd/cafccdcf20bd30969ed8419c8ae9a573 to your computer and use it in GitHub Desktop.
Save jsvd/cafccdcf20bd30969ed8419c8ae9a573 to your computer and use it in GitHub Desktop.
demo code for presentation about elasticsearch by João Duarte in @ Elastic Meetup Lisboa, 2017-01-05
## Document Analysis
# standard, letter, whitespace, ngram
POST _analyze
{
"tokenizer": "ngram",
"text": "Filipe's shoes, yo."
}
POST _analyze
{
"analyzer": "standard",
"text": "Filipe's shoes, yo."
}
DELETE my_index
POST my_index/_analyze
{
"analyzer": "my_custom_analyzer",
"text": "He who controls the spice, controls the Universe."
}
# simple custom analyzer
PUT my_index
{
"settings": {
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"type": "custom",
"char_filter": [],
"tokenizer": "standard",
"filter": []
}
}
}
}
}
# custom analyzer with a few filters
PUT my_index/
{
"settings": {
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"type": "custom",
"char_filter": [],
"tokenizer": "standard",
"filter": [
"lowercase",
"stop"
]
}
}
}
}
}
# custom analyzer with stemmer
PUT my_index
{
"settings": {
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"type": "custom",
"char_filter": [],
"tokenizer": "standard",
"filter": [
"lowercase",
"stop",
"stemmer"
]
}
}
}
}
}
# custom analyzer with char filters
PUT my_index
{
"settings": {
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"type": "custom",
"char_filter": [
"html_strip", "emoticons"
],
"tokenizer": "standard",
"filter": [
"lowercase",
"stemmer",
"stop"
]
}
},
"char_filter": {
"emoticons": {
"type": "mapping",
"mappings": [
":) => _happy_",
":( => _sad_"
]
}
}
}
}
}
POST my_index/_analyze
{
"analyzer": "my_custom_analyzer",
"text": "<p>He who controls the spice, controls the universe :)</p>"
}
GET my_index/_settings
# Indexing
# see how inverted index would be created
POST my_index/_analyze
{
"analyzer": "my_custom_analyzer",
"text": "He who controls the spice, controls the universe."
}
POST my_index/_analyze
{
"analyzer": "my_custom_analyzer",
"text": "A mad man sees what he sees."
}
POST my_index/_analyze
{
"analyzer": "my_custom_analyzer",
"text": "What if a mad man controlled the universe?"
}
GET my_index/_mapping
PUT my_index/_mapping/my_type
{
"properties": {
"text": {
"type": "string",
"analyzer": "my_custom_analyzer"
}
}
}
## reminder: first create index w/ analyzer and add mapping
# index documents
POST _bulk
{ "index" : { "_index" : "my_index", "_type" : "mytype" } }
{ "text": "He who controls the spice, controls the universe." }
{ "index" : { "_index" : "my_index", "_type" : "mytype" } }
{ "text": "A mad man sees what he sees." }
{ "index" : { "_index" : "my_index", "_type" : "mytype" } }
{ "text": "What if a mad man controlled the universe?" }
# look at the documents
GET my_index/_search
# search for Mad
GET my_index/_search?q=Mad
GET _cat/indices
# match query
GET my_index/_search?explain
{
"query": {
"match" : {
"text" : {
"query" : "controllable spice"
}
}
}
}
# match_phrase query
GET my_index/_search
{
"query": {
"match_phrase" : {
"text" : {
"query" : "control spice",
"slop": 1
}
}
}
}
# query time analyzer: difference between standard vs my_custom_analyzer
GET my_index/_search
{
"query": {
"match" : {
"text" : {
"query" : "universal",
"analyzer": "my_custom_analyz"
}
}
}
}
GET news/_search
# score
GET news/_search
{
"query": {
"match": {
"title": "donald trump jimmy fallon hillary clinton"
}
}
}
# relevancy boosting
# default boost is 1
GET news/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"title": {
"query": "best movies netflix",
"boost": 2
}
}
},
{
"match": {
"content": {
"query": "best movies netflix"
}
}
}
],
"filter": [
{ "term": { "media-type": "news" } }
]
}
}
}
# news source boosting
GET news/_search
{
"query": {
"bool": {
"must": [
{ "bool": {
"should": [
{
"match": { "title": { "query": "best movies netflix" } }
},
{
"match": {"content": { "query": "best movies netflix" } }
}
]
} }
],
"should": [
{
"match_phrase": {
"source": {
"query": "Tech Investor News",
"boost": 2
}
}
},
{
"match_phrase": {
"source": {
"query": "TechRadar"
}
}
}
]
}
}
}
## IMDB DATA
DELETE imdb
PUT imdb
{
"settings": {
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"type": "custom",
"char_filter": [],
"tokenizer": "standard",
"filter": [
"lowercase",
"stop",
"stemmer"
]
}
}
}
}
}
PUT imdb/_mapping/movies
{
"properties": {
"release_year": {
"type": "date",
"format": "yyyy"
},
"plot": {
"type": "string",
"analyzer": "my_custom_analyzer"
}
}
}
GET imdb/_count
GET imdb/_search
{
"query": {
"match": {
"simple_name": "The Waking Life"
}
}
}
# Highlighter
GET imdb/_search
{
"query" : {
"bool": {
"must": [
{ "match": { "plot": "sun explode spaceship" } }
],
"filter": [
{ "range": { "num_votes": { "gt": 10000 } } }
]
}
},
"highlight": {
"fields": {
"plot": {}
}
}
}
# more like this
GET imdb/movies/AVlrarz7VMWvHJXxP15Q
GET imdb/movies/AVlrasPoVMWvHJXxP5RW
GET imdb/_search
{
"_source": [
"simple_name",
"release_year",
"tag_line",
"genres",
"rating"
],
"query" : {
"bool": {
"must": [
{
"more_like_this" : {
"fields" : ["plot"],
"like" : [
{
"_index" : "imdb",
"_type" : "movies",
"_id" : "AVlrarz7VMWvHJXxP15Q"
},
{
"_index" : "imdb",
"_type" : "movies",
"_id" : "AVlrasPoVMWvHJXxP5RW"
}
]
}
}
],
"filter": [{
"range": {
"num_votes": { "gt": 10000 }
}
}]
}
}
}
# Suggester
# failed match query
GET imdb/_search
{
"query": {
"match": { "simple_name": "koyaniskatsi" }
}
}
# fuzzy query
GET imdb/_search
{
"query": {
"match": {
"simple_name": {
"query": "koyaniskatsi",
"fuzziness": 2
}
}
}
}
# another way - suggester
GET imdb/_search
{
"query" : {
"match": {
"simple_name": "koyaniskatsi"
}
},
"suggest" : {
"my-suggestion" : {
"text" : "koyaniskatsi",
"term" : {
"field" : "simple_name"
}
}
}
}
# correct query
GET imdb/_search
{
"query": {
"match": {
"simple_name": {
"query": "koyaanisqatsi"
}
}
}
}
# query time search as you type
GET imdb/_search
{
"_source": "name",
"query": {
"match_phrase_prefix" : {
"simple_name" : "In"
}
},
"sort": [
{
"num_votes": {
"order": "desc"
}
}
]
}
# This can also
be done at index time with ngram tokenizers
POST _analyze
{
"tokenizer": "ngram",
"text": "Inception"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment