Skip to content

Instantly share code, notes, and snippets.

@liemle3893
Last active January 1, 2018 19:40
Show Gist options
  • Save liemle3893/72429cf618e16404eea2e2c8aaaf86e0 to your computer and use it in GitHub Desktop.
Save liemle3893/72429cf618e16404eea2e2c8aaaf86e0 to your computer and use it in GitHub Desktop.
Elastic Analyser for auto complete search
  1. Remember to install 'analysis-icu' plugins first
  2. Best practices: always use alias (as you can see my stupid settings has 2 at the end that mean I already had 1 =))
  3. For case where you need to support emoticon like ❤️. Simply preprocess it to :heart: (use 3rd library or mapping char filter by elasticsearch)
    1. As of 2018 (actually on Dec 2016) Elasticsearch has provided plug-in for this feature.

analyser.json

{
    "360live_ext_2": {
        "settings": {
            "index": {
                "number_of_shards": "5",
                "provided_name": "360live_ext_2",
                "creation_date": "1491073195992",
                "analysis": {
                    "analyzer": {
                        "strip_accent_term": {
                            "filter": [
                                "icu_normalizer",
                                "icu_folding"
                            ],
                            "char_filter": [
                                "colon_to_dash_char_filter"
                            ],
                            "tokenizer": "icu_tokenizer"
                        },
                        "accent_medium": {
                            "filter": [
                                "lowercase"
                            ],
                            "tokenizer": "accent_medium"
                        },
                        "match_term": {
                            "filter": [
                                "lowercase"
                            ],
                            "char_filter": [
                                "colon_to_dash_char_filter"
                            ],
                            "tokenizer": "keyword"
                        },
                        "accent": {
                            "filter": [
                                "lowercase"
                            ],
                            "tokenizer": "accent_short"
                        },
                        "accent_term": {
                            "filter": [
                                "icu_normalizer"
                            ],
                            "char_filter": [
                                "colon_to_dash_char_filter"
                            ],
                            "tokenizer": "icu_tokenizer"
                        },
                        "autocomplete_search": {
                            "filter": [
                                "lowercase"
                            ],
                            "tokenizer": "whitespace"
                        },
                        "strip_accent": {
                            "filter": [
                                "icu_normalizer",
                                "icu_folding"
                            ],
                            "tokenizer": "accent_short"
                        }
                    },
                    "char_filter": {
                        "colon_to_dash_char_filter": {
                            "type": "mapping",
                            "mappings": [
                                ": => _"
                            ]
                        }
                    },
                    "tokenizer": {
                        "accent_short": {
                            "token_chars": [
                                "letter",
                                "digit"
                            ],
                            "min_gram": "1",
                            "type": "edge_ngram",
                            "max_gram": "10"
                        },
                        "accent_medium": {
                            "token_chars": [
                                "letter",
                                "digit"
                            ],
                            "min_gram": "1",
                            "type": "edge_ngram",
                            "max_gram": "50"
                        }
                    }
                },
                "number_of_replicas": "1",
                "uuid": "ajv_R7v3TW6YQDfQCl7PqQ",
                "version": {
                    "created": "5020199"
                }
            }
        }
    }
}

mappings.json

{
    "360live_ext_2": {
        "mappings": {
            "user": {
                "properties": {
                    "displayName": {
                        "type": "text",
                        "fields": {
                            "accent": {
                                "type": "text",
                                "analyzer": "accent",
                                "search_analyzer": "autocomplete_search"
                            },
                            "accent_term": {
                                "type": "text",
                                "analyzer": "accent_term",
                                "search_analyzer": "autocomplete_search"
                            },
                            "strip_accent": {
                                "type": "text",
                                "analyzer": "strip_accent",
                                "search_analyzer": "autocomplete_search"
                            },
                            "strip_accent_term": {
                                "type": "text",
                                "analyzer": "strip_accent_term",
                                "search_analyzer": "autocomplete_search"
                            }
                        },
                        "analyzer": "match_term",
                        "search_analyzer": "autocomplete_search"
                    },
                    "id": {
                        "type": "text",
                        "fields": {
                            "key": {
                                "type": "text",
                                "analyzer": "match_term"
                            }
                        },
                        "analyzer": "accent_medium",
                        "search_analyzer": "autocomplete_search"
                    },
                    "username": {
                        "type": "text",
                        "fields": {
                            "key": {
                                "type": "text",
                                "analyzer": "match_term"
                            }
                        },
                        "analyzer": "accent_medium",
                        "search_analyzer": "autocomplete_search"
                    }
                }
            }
        }
    }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment