Skip to content

Instantly share code, notes, and snippets.

@piwii
Created June 28, 2016 11:57
Show Gist options
  • Save piwii/544c33253be47df7d99f0203e5e5d91b to your computer and use it in GitHub Desktop.
Save piwii/544c33253be47df7d99f0203e5e5d91b to your computer and use it in GitHub Desktop.
Explanation of elasticsearch analysis with differents filters
GET /timeline/event/_search
{
"query": {
"bool": {
"must": [
{
"terms": {
"author_uuid": [
{
"uuid": "c6c99520-1d8b-45d5-a851-2bc159817af6"
},
{
"uuid": "3dcfb094-6ac8-4295-af0c-9dd8d744293c"
}
]
}
},
{
"terms": {
"namespace": [
"news",
"advert",
"event"
]
}
}
]
}
},
"sort": {
"last_activity": "desc"
},
"size": 30,
"from": 0
}
GET /timeline/event/_search
{
"size": 120,
"query": {
"match_all": {}
},
"sort": {
"last_activity": "desc"
}
}
# ANALYSER STANDARD
GET /my_index/_analyze?analyzer=standard&text="Y a de l'eau dans le gaz MR WI-FI."
# FILTER ELISION
PUT /my_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"my_filter_elision": {
"type": "elision",
"articles": [
"l",
"m",
"t",
"qu",
"n",
"s",
"j",
"d"
]
}
},
"analyzer": {
"my_analizer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"my_filter_elision"
]
}
}
}
}
}
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR PowerShot."
DELETE /my_index
# FILTER LOWERCASE
PUT /my_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"analyzer": {
"my_analizer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"elision"
]
}
}
}
}
}
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR WI-FI."
DELETE /my_index
# FILTER STOPWORD
PUT /my_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"my_stop_word" : {
"type": "stop",
"stopwords": "_french_"
}
},
"analyzer": {
"my_analizer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"elision",
"my_stop_word"
]
}
}
}
}
}
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR WI-FI."
DELETE /my_index
# FILTER WORD DELIMITER
PUT /my_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"my_stop_word" : {
"type": "stop",
"stopwords": "_french_"
}
},
"analyzer": {
"my_analizer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"elision",
"my_stop_word",
"word_delimiter"
]
}
}
}
}
}
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR PowerShot."
DELETE /my_index
# FILTER WORD DELIMITER (GOOD ORDER)
PUT /my_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"my_stop_word" : {
"type": "stop",
"stopwords": "_french_"
}
},
"analyzer": {
"my_analizer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"my_stop_word",
"elision",
"word_delimiter",
"lowercase"
]
}
}
}
}
}
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR PowerShot chevaux."
DELETE /my_index
# SNOWBALL
PUT /my_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"my_stop_word" : {
"type": "stop",
"stopwords": "_french_"
},
"my_snowball" : {
"type" : "snowball",
"language": "French"
}
},
"analyzer": {
"my_analizer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"my_stop_word",
"elision",
"word_delimiter",
"lowercase",
"my_snowball"
]
}
}
}
}
}
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de maisons méchamment l'eau dans le gaz MR PowerShot chevaux."
DELETE /my_index
# asciifolding
PUT /my_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"my_stop_word" : {
"type": "stop",
"stopwords": "_french_"
},
"my_snowball" : {
"type" : "snowball",
"language": "French"
},
"my_edge_ngram": {
"type": "edge_ngram",
"min_gram": 3,
"max_gram": 10
}
},
"analyzer": {
"my_analizer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"my_stop_word",
"elision",
"word_delimiter",
"lowercase",
"my_snowball",
"asciifolding"
]
}
}
}
}
}
GET /my_index/_analyze?analyzer=my_analizer&text="Il faut être comme sera on est mais ne pas être ce que nous fûmes"
DELETE /my_index
# EDGE GRAM
PUT /my_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"my_stop_word" : {
"type": "stop",
"stopwords": "_french_"
},
"my_snowball" : {
"type" : "snowball",
"language": "French"
},
"my_edge_ngram": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 10
}
},
"analyzer": {
"my_analizer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"my_stop_word",
"elision",
"word_delimiter",
"lowercase",
"my_snowball",
"asciifolding",
"my_edge_ngram"
]
}
}
}
}
}
GET /my_index/_analyze?analyzer=my_analizer&text="Y a de l'eau dans le gaz MR PowerShot chevaux."
# INDEX DATA
PUT /my_index/_mapping/my_type
{
"my_type": {
"properties": {
"name": {
"type": "string",
"analyzer": "my_analizer"
}
}
}
}
POST /my_index/my_type/_bulk
{"index":{"_id":1}}
{"name": "Mes amis les chevaux"}
{"index":{"_id":2}}
{"name":"Nous étions bonnes quand on étais jeunes"}
{"index":{"_id":3}}
{"name":"Guillaume est amoureux de game of throne et d'un cheval"}
{"index":{"_id":4}}
{"name":"Maxime est un petit jeunot mais son esprit est bon"}
{"index":{"_id":5}}
{"name":"Il faut être comme on est mais ne pas être ce que nous fûmes"}
GET /my_index/my_type/_search
{
"query": {
"match_all": {}
}
}
GET /my_index/_analyze?analyzer=my_analizer&text="Nous étions bonnes quand on étais jeunes"
# QUERY DATA
GET /my_index/my_type/_search
{
"query": {
"match": {
"name": "jeunot"
}
}
}
# QUERY DATA WITH STANDARD ANALIZER
GET /my_index/my_type/_search
{
"query": {
"match": {
"name": {
"query": "jeun",
"analyzer": "standard"
}
}
}
}
# QUERY DATA WITH STANDARD ANALIZER
GET /my_index/my_type/_search?explain
{
"query": {
"match": {
"name": {
"query": "cheval",
"analyzer": "standard"
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment