Created
June 28, 2016 11:57
-
-
Save piwii/544c33253be47df7d99f0203e5e5d91b to your computer and use it in GitHub Desktop.
Explanation of elasticsearch analysis with differents filters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GET /timeline/event/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"terms": { | |
"author_uuid": [ | |
{ | |
"uuid": "c6c99520-1d8b-45d5-a851-2bc159817af6" | |
}, | |
{ | |
"uuid": "3dcfb094-6ac8-4295-af0c-9dd8d744293c" | |
} | |
] | |
} | |
}, | |
{ | |
"terms": { | |
"namespace": [ | |
"news", | |
"advert", | |
"event" | |
] | |
} | |
} | |
] | |
} | |
}, | |
"sort": { | |
"last_activity": "desc" | |
}, | |
"size": 30, | |
"from": 0 | |
} | |
GET /timeline/event/_search | |
{ | |
"size": 120, | |
"query": { | |
"match_all": {} | |
}, | |
"sort": { | |
"last_activity": "desc" | |
} | |
} | |
# ANALYSER STANDARD | |
GET /my_index/_analyze?analyzer=standard&text="Y a de l'eau dans le gaz MR WI-FI." | |
# FILTER ELISION | |
PUT /my_index | |
{ | |
"settings": { | |
"number_of_shards": 1, | |
"analysis": { | |
"filter": { | |
"my_filter_elision": { | |
"type": "elision", | |
"articles": [ | |
"l", | |
"m", | |
"t", | |
"qu", | |
"n", | |
"s", | |
"j", | |
"d" | |
] | |
} | |
}, | |
"analyzer": { | |
"my_analizer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"my_filter_elision" | |
] | |
} | |
} | |
} | |
} | |
} | |
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR PowerShot." | |
DELETE /my_index | |
# FILTER LOWERCASE | |
PUT /my_index | |
{ | |
"settings": { | |
"number_of_shards": 1, | |
"analysis": { | |
"analyzer": { | |
"my_analizer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"elision" | |
] | |
} | |
} | |
} | |
} | |
} | |
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR WI-FI." | |
DELETE /my_index | |
# FILTER STOPWORD | |
PUT /my_index | |
{ | |
"settings": { | |
"number_of_shards": 1, | |
"analysis": { | |
"filter": { | |
"my_stop_word" : { | |
"type": "stop", | |
"stopwords": "_french_" | |
} | |
}, | |
"analyzer": { | |
"my_analizer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"elision", | |
"my_stop_word" | |
] | |
} | |
} | |
} | |
} | |
} | |
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR WI-FI." | |
DELETE /my_index | |
# FILTER WORD DELIMITER | |
PUT /my_index | |
{ | |
"settings": { | |
"number_of_shards": 1, | |
"analysis": { | |
"filter": { | |
"my_stop_word" : { | |
"type": "stop", | |
"stopwords": "_french_" | |
} | |
}, | |
"analyzer": { | |
"my_analizer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"elision", | |
"my_stop_word", | |
"word_delimiter" | |
] | |
} | |
} | |
} | |
} | |
} | |
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR PowerShot." | |
DELETE /my_index | |
# FILTER WORD DELIMITER (GOOD ORDER) | |
PUT /my_index | |
{ | |
"settings": { | |
"number_of_shards": 1, | |
"analysis": { | |
"filter": { | |
"my_stop_word" : { | |
"type": "stop", | |
"stopwords": "_french_" | |
} | |
}, | |
"analyzer": { | |
"my_analizer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"my_stop_word", | |
"elision", | |
"word_delimiter", | |
"lowercase" | |
] | |
} | |
} | |
} | |
} | |
} | |
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR PowerShot chevaux." | |
DELETE /my_index | |
# SNOWBALL | |
PUT /my_index | |
{ | |
"settings": { | |
"number_of_shards": 1, | |
"analysis": { | |
"filter": { | |
"my_stop_word" : { | |
"type": "stop", | |
"stopwords": "_french_" | |
}, | |
"my_snowball" : { | |
"type" : "snowball", | |
"language": "French" | |
} | |
}, | |
"analyzer": { | |
"my_analizer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"my_stop_word", | |
"elision", | |
"word_delimiter", | |
"lowercase", | |
"my_snowball" | |
] | |
} | |
} | |
} | |
} | |
} | |
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de maisons méchamment l'eau dans le gaz MR PowerShot chevaux." | |
DELETE /my_index | |
# asciifolding | |
PUT /my_index | |
{ | |
"settings": { | |
"number_of_shards": 1, | |
"analysis": { | |
"filter": { | |
"my_stop_word" : { | |
"type": "stop", | |
"stopwords": "_french_" | |
}, | |
"my_snowball" : { | |
"type" : "snowball", | |
"language": "French" | |
}, | |
"my_edge_ngram": { | |
"type": "edge_ngram", | |
"min_gram": 3, | |
"max_gram": 10 | |
} | |
}, | |
"analyzer": { | |
"my_analizer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"my_stop_word", | |
"elision", | |
"word_delimiter", | |
"lowercase", | |
"my_snowball", | |
"asciifolding" | |
] | |
} | |
} | |
} | |
} | |
} | |
GET /my_index/_analyze?analyzer=my_analizer&text="Il faut être comme sera on est mais ne pas être ce que nous fûmes" | |
DELETE /my_index | |
# EDGE GRAM | |
PUT /my_index | |
{ | |
"settings": { | |
"number_of_shards": 1, | |
"analysis": { | |
"filter": { | |
"my_stop_word" : { | |
"type": "stop", | |
"stopwords": "_french_" | |
}, | |
"my_snowball" : { | |
"type" : "snowball", | |
"language": "French" | |
}, | |
"my_edge_ngram": { | |
"type": "edge_ngram", | |
"min_gram": 2, | |
"max_gram": 10 | |
} | |
}, | |
"analyzer": { | |
"my_analizer": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"my_stop_word", | |
"elision", | |
"word_delimiter", | |
"lowercase", | |
"my_snowball", | |
"asciifolding", | |
"my_edge_ngram" | |
] | |
} | |
} | |
} | |
} | |
} | |
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR PowerShot chevaux." | |
# INDEX DATA | |
PUT /my_index/_mapping/my_type | |
{ | |
"my_type": { | |
"properties": { | |
"name": { | |
"type": "string", | |
"analyzer": "my_analizer" | |
} | |
} | |
} | |
} | |
POST /my_index/my_type/_bulk | |
{"index":{"_id":1}} | |
{"name": "Mes amis les chevaux"} | |
{"index":{"_id":2}} | |
{"name":"Nous étions bonnes quand on étais jeunes"} | |
{"index":{"_id":3}} | |
{"name":"Guillaume est amoureux de game of throne et d'un cheval"} | |
{"index":{"_id":4}} | |
{"name":"Maxime est un petit jeunot mais son esprit est bon"} | |
{"index":{"_id":5}} | |
{"name":"Il faut être comme on est mais ne pas être ce que nous fûmes"} | |
GET /my_index/my_type/_search | |
{ | |
"query": { | |
"match_all": {} | |
} | |
} | |
GET /my_index/_analyze?analyzer=my_analizer&text="Nous étions bonnes quand on étais jeunes" | |
# QUERY DATA | |
GET /my_index/my_type/_search | |
{ | |
"query": { | |
"match": { | |
"name": "jeunot" | |
} | |
} | |
} | |
# QUERY DATA WITH STANDARD ANALIZER | |
GET /my_index/my_type/_search | |
{ | |
"query": { | |
"match": { | |
"name": { | |
"query": "jeun", | |
"analyzer": "standard" | |
} | |
} | |
} | |
} | |
# QUERY DATA WITH STANDARD ANALIZER | |
GET /my_index/my_type/_search?explain | |
{ | |
"query": { | |
"match": { | |
"name": { | |
"query": "cheval", | |
"analyzer": "standard" | |
} | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment