Skip to content

Instantly share code, notes, and snippets.

@binhqd
Last active April 29, 2019 09:22
Show Gist options
  • Save binhqd/874808d6e4e24431d3ad3facfaeb7438 to your computer and use it in GitHub Desktop.
Save binhqd/874808d6e4e24431d3ad3facfaeb7438 to your computer and use it in GitHub Desktop.
ElasticSearch settings & Mapping

PUT /objects

{
  "settings": {
    "similarity": {
      "my_bm25": { 
        "type": "BM25",
        "b":    0 
      }
    },
    "index": {
      "analysis": {
        "analyzer" : {
            "akt_analyzer_exact_en" : {
                "tokenizer" : "keyword",
                "filter" : [
                    "lowercase",
                    "asciifolding",
                    "trim"
                ]
            },
            "akt_analyzer_standard_en" : {
                "tokenizer" : "standard",
                "filter" : [
                    "synonym_en",
                    "spell_en_US",
                    "lowercase",
                    "asciifolding",
                    "trim"
                ]
            },
            "akt_analyzer_pattern_en" : {
                "tokenizer" : "akt_pattern_tokernizer_en",
                "filter" : [
                    "synonym_en",
                    "spell_en_US",
                    "lowercase",
                    "asciifolding",
                    "trim"
                ]
            },
            "akt_potential_type_en": {
                "tokenizer" : "akt_potential_type_en",
                "filter" : [
                    "lowercase",
                    "asciifolding",
                    "trim"
                ]
            },
            "romaji_analyzer" : {
                "tokenizer" : "kuromoji_tokenizer",
                "filter" : ["romaji_readingform"]
            },
            "katakana_analyzer" : {
                "tokenizer" : "kuromoji_tokenizer",
                "filter" : ["katakana_readingform"]
            },
            "akt_analyzer_exact_ja" : {
                "tokenizer" : "keyword",
                "filter" : [
                    "trim"
                ]
            },
            "akt_analyzer_standard_ja" : {
                "tokenizer" : "kuromoji_tokenizer",
                "filter" : [
                    "trim",
                    "katakana_readingform"
                ]
            },
            "akt_analyzer_pattern_ja" : {
                "tokenizer" : "akt_pattern_tokernizer_ja",
                "filter" : [
                    "trim"
                ]
            },
            "akt_potential_type_ja": {
                "tokenizer" : "akt_potential_type_ja",
                "filter" : [
                    "trim"
                ]
            }
        },
        "tokenizer": {
          "edge_ngram_tokenizer": {
            "type": "edge_ngram",
            "min_gram": 2,
            "max_gram": 5,
            "token_chars": [
              "letter"
            ]
          },
          "akt_pattern_tokernizer_en": {
            "type": "pattern",
            "pattern": "(hotel|hostel|astotel|motel|restaurant|bar|museum|cafe|café|coffee|bakery|baker|pizzeria|pizza|^bake|bake$|cake|cuisine|food|bridge|stadium|moutain|beach|park|chocolat|salon|kitchen|boutique|resort|villa|bakery|cathedral|church|bank|airport|garden|spa$|^spa|^opera|opera$|station|studio|dessert|cuisine|cook|grill|steak| |,|-|_|&)"
          },
          "akt_potential_type_en": {
            "type": "pattern",
            "pattern": "(hotel|hostel|astotel|motel|restaurant|bar|museum|cafe|café|coffee|bakery|baker|pizzeria|pizza|^bake|bake$|cake|cuisine|food|bridge|stadium|moutain|beach|park|chocolat|salon|kitchen|boutique|resort|villa|bakery|cathedral|church|bank|airport|garden|spa$|^spa|^opera|opera$|station|studio|dessert|cuisine|cook|grill|steak)",
            "flags": "CASE_INSENSITIVE",
            "group": 1
          },
          "akt_pattern_tokernizer_ja": {
            "type": "pattern",
            "pattern": "(hotel|hostel|astotel|motel|restaurant|bar|museum|cafe|café|coffee|bakery|baker|pizzeria|pizza|^bake|bake$|cake|cuisine|food|bridge|stadium|moutain|beach|park|chocolat|salon|kitchen|boutique|resort|villa|bakery|cathedral|church|bank|airport|garden|spa$|^spa|^opera|opera$|station|studio|dessert|cuisine|cook|grill|steak| |,|-|_|&)"
          },
          "akt_potential_type_ja": {
            "type": "pattern",
            "pattern": "(hotel|hostel|astotel|motel|restaurant|bar|museum|cafe|café|coffee|bakery|baker|pizzeria|pizza|^bake|bake$|cake|cuisine|food|bridge|stadium|moutain|beach|park|chocolat|salon|kitchen|boutique|resort|villa|bakery|cathedral|church|bank|airport|garden|spa$|^spa|^opera|opera$|station|studio|dessert|cuisine|cook|grill|steak)",
            "flags": "CASE_INSENSITIVE",
            "group": 1
          }
        },
        "filter" : {
            "romaji_readingform" : {
                "type" : "kuromoji_readingform",
                "use_romaji" : true
            },
            "katakana_readingform" : {
                "type" : "kuromoji_readingform",
                "use_romaji" : false
            },
            "synonym_en" : {
                "type" : "synonym",
                "synonyms_path" : "analysis/en_synonyms.txt"
            },
            "spell_en_US": {
                "type":     "hunspell",
                "language": "en_US" 
            }
        }
      }
    }
  },
  "mappings": {
    "_doc": {
      "properties": {
        "name_en": {
          "type": "text",
          "analyzer": "akt_analyzer_exact_en",
          "fields": {
            "standard_tokenizer": {
                "type": "text",
                "similarity": "my_bm25",
                "analyzer": "akt_analyzer_standard_en",
                "fields": {
                    "length": { 
                      "type": "token_count",
                      "analyzer": "akt_analyzer_standard_en"
                    }
                }
            },
            "pattern_tokenizer": {
                "type": "text",
                "similarity": "my_bm25",
                "analyzer": "akt_analyzer_pattern_en",
                "fields": {
                    "length": { 
                      "type": "token_count",
                      "analyzer": "akt_analyzer_pattern_en"
                    }
                }
            },
            "potential_types": {
                "type": "text",
                "analyzer": "akt_potential_type_en",
                "fielddata": true
            }
          }
        },
        "name_ja": {
            "type": "text",
            "analyzer": "akt_analyzer_exact_ja",
            "fields": {
              "standard_tokenizer": {
                  "type": "text",
                  "similarity": "my_bm25",
                  "analyzer": "akt_analyzer_standard_ja",
                  "fields": {
                      "length": { 
                        "type": "token_count",
                        "analyzer": "akt_analyzer_standard_ja"
                      }
                  }
              },
              "pattern_tokenizer": {
                  "type": "text",
                  "similarity": "my_bm25",
                  "analyzer": "akt_analyzer_pattern_ja",
                  "fields": {
                      "length": { 
                        "type": "token_count",
                        "analyzer": "akt_analyzer_pattern_ja"
                      }
                  }
              },
              "potential_types": {
                  "type": "text",
                  "analyzer": "akt_potential_type_ja",
                  "fielddata": true
              }
            }
        },
        "country": {
          "type": "text"
        },
        "city": {
          "type": "text"
        },
        "type": {
          "type": "text"
        },
        "aka": {
          "type": "nested",
          "properties": {
            "name_en": {
                "type": "text",
                "analyzer": "akt_analyzer_exact_en",
                "fields": {
                    "standard_tokenizer": {
                        "type": "text",
                        "similarity": "my_bm25",
                        "analyzer": "akt_analyzer_standard_en",
                        "fields": {
                            "length": {
                              "type": "token_count",
                              "analyzer": "akt_analyzer_standard_en"
                            }
                        }
                    },
                    "pattern_tokenizer": {
                        "type": "text",
                        "similarity": "my_bm25",
                        "analyzer": "akt_analyzer_pattern_en",
                        "fields": {
                            "length": {
                              "type": "token_count",
                              "analyzer": "akt_analyzer_pattern_en"
                            }
                        }
                    },
                    "potential_types": {
                        "type": "text",
                        "analyzer": "akt_potential_type_en",
                        "fielddata": true
                    }
                  }
            },
            "name_ja": {
                "type": "text",
                "analyzer": "akt_analyzer_exact_ja",
                "fields": {
                "standard_tokenizer": {
                    "type": "text",
                    "similarity": "my_bm25",
                    "analyzer": "akt_analyzer_standard_ja",
                    "fields": {
                        "length": { 
                            "type": "token_count",
                            "analyzer": "akt_analyzer_standard_ja"
                        }
                    }
                },
                "pattern_tokenizer": {
                    "type": "text",
                    "similarity": "my_bm25",
                    "analyzer": "akt_analyzer_pattern_ja",
                    "fields": {
                        "length": { 
                            "type": "token_count",
                            "analyzer": "akt_analyzer_pattern_ja"
                        }
                    }
                },
                "potential_types": {
                    "type": "text",
                    "analyzer": "akt_potential_type_ja",
                    "fielddata": true
                }
              }
            }
          }
        }
      }
    }
  }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment