- Remember to install 'analysis-icu' plugins first
- Best practices: always use alias (as you can see my stupid settings has
2
at the end that mean I already had1
=)) - For case where you need to support emoticon like ❤️. Simply preprocess it to
:heart:
(use 3rd library or mapping char filter by elasticsearch)- As of 2018 (actually on Dec 2016) Elasticsearch has provided plug-in for this feature.
analyser.json
{
"360live_ext_2": {
"settings": {
"index": {
"number_of_shards": "5",
"provided_name": "360live_ext_2",
"creation_date": "1491073195992",
"analysis": {
"analyzer": {
"strip_accent_term": {
"filter": [
"icu_normalizer",
"icu_folding"
],
"char_filter": [
"colon_to_dash_char_filter"
],
"tokenizer": "icu_tokenizer"
},
"accent_medium": {
"filter": [
"lowercase"
],
"tokenizer": "accent_medium"
},
"match_term": {
"filter": [
"lowercase"
],
"char_filter": [
"colon_to_dash_char_filter"
],
"tokenizer": "keyword"
},
"accent": {
"filter": [
"lowercase"
],
"tokenizer": "accent_short"
},
"accent_term": {
"filter": [
"icu_normalizer"
],
"char_filter": [
"colon_to_dash_char_filter"
],
"tokenizer": "icu_tokenizer"
},
"autocomplete_search": {
"filter": [
"lowercase"
],
"tokenizer": "whitespace"
},
"strip_accent": {
"filter": [
"icu_normalizer",
"icu_folding"
],
"tokenizer": "accent_short"
}
},
"char_filter": {
"colon_to_dash_char_filter": {
"type": "mapping",
"mappings": [
": => _"
]
}
},
"tokenizer": {
"accent_short": {
"token_chars": [
"letter",
"digit"
],
"min_gram": "1",
"type": "edge_ngram",
"max_gram": "10"
},
"accent_medium": {
"token_chars": [
"letter",
"digit"
],
"min_gram": "1",
"type": "edge_ngram",
"max_gram": "50"
}
}
},
"number_of_replicas": "1",
"uuid": "ajv_R7v3TW6YQDfQCl7PqQ",
"version": {
"created": "5020199"
}
}
}
}
}
mappings.json
{
"360live_ext_2": {
"mappings": {
"user": {
"properties": {
"displayName": {
"type": "text",
"fields": {
"accent": {
"type": "text",
"analyzer": "accent",
"search_analyzer": "autocomplete_search"
},
"accent_term": {
"type": "text",
"analyzer": "accent_term",
"search_analyzer": "autocomplete_search"
},
"strip_accent": {
"type": "text",
"analyzer": "strip_accent",
"search_analyzer": "autocomplete_search"
},
"strip_accent_term": {
"type": "text",
"analyzer": "strip_accent_term",
"search_analyzer": "autocomplete_search"
}
},
"analyzer": "match_term",
"search_analyzer": "autocomplete_search"
},
"id": {
"type": "text",
"fields": {
"key": {
"type": "text",
"analyzer": "match_term"
}
},
"analyzer": "accent_medium",
"search_analyzer": "autocomplete_search"
},
"username": {
"type": "text",
"fields": {
"key": {
"type": "text",
"analyzer": "match_term"
}
},
"analyzer": "accent_medium",
"search_analyzer": "autocomplete_search"
}
}
}
}
}
}