Skip to content

Instantly share code, notes, and snippets.

@ptgamr
Forked from avar/ngram-search.sh
Created May 5, 2016 13:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ptgamr/9bf3fc90f12abe242ef1bb285503f831 to your computer and use it in GitHub Desktop.
Save ptgamr/9bf3fc90f12abe242ef1bb285503f831 to your computer and use it in GitHub Desktop.
ElasticSearch fuzzy ngram powered search
export http_proxy=
export https_proxy=
curl -XDELETE 'http://localhost:9200/test/'
echo "Creating the mapping"
curl -XPUT 'http://localhost:9200/test/?pretty=1' -d '
{
"mappings" : {
"member" : {
"properties" : {
"person_name" : {
"fields" : {
"partial_person_name" : {
"search_analyzer" : "ngrams_for_cjk",
"index_analyzer" : "ngrams_for_cjk",
"type" : "string"
},
"person_name" : {
"type" : "string",
"analyzer" : "full_name"
}
},
"type" : "multi_field"
},
"city_name" : {
"fields" : {
"partial_city_name" : {
"search_analyzer" : "ngrams_for_cjk",
"index_analyzer" : "ngrams_for_cjk",
"type" : "string"
},
"partial_city_name_prefix" : {
"search_analyzer" : "prefix_ngrams_for_cjk",
"index_analyzer" : "prefix_ngrams_for_cjk",
"type" : "string"
},
"city_name" : {
"type" : "string",
"analyzer" : "full_name"
}
},
"type" : "multi_field"
}
}
}
},
"settings" : {
"analysis" : {
"filter" : {
"ngrams_for_every_few_characters" : {
"type" : "nGram",
"max_gram": "10",
"min_gram" : "2"
},
"ngrams_for_prefix" : {
"type" : "edgeNGram",
"max_gram": "10",
"min_gram" : "2",
"side" : "front"
}
},
"analyzer" : {
"full_name" : {
"filter" : [
"standard",
"lowercase",
"asciifolding"
],
"type" : "custom",
"tokenizer" : "standard"
},
"ngrams_for_cjk" : {
"filter" : [
"lowercase",
"ngrams_for_every_few_characters"
],
"type" : "custom",
"tokenizer" : "standard"
},
"prefix_ngrams_for_cjk" : {
"filter" : [
"lowercase",
"ngrams_for_prefix"
],
"type" : "custom",
"tokenizer" : "standard"
}
}
}
}
}
'; echo
echo "Indexing"
curl -XPOST 'http://localhost:9200/_bulk?pretty=1' -d '
{"index" : {"_index" : "test", "_type" : "member"}}
{"person_name" : "John Smith", "city_name" : "Amsterdam"}
{"index" : {"_index" : "test", "_type" : "member"}}
{"person_name" : "Ævar Arnfjörð Bjarmason", "city_name" : "Amsterdam"}
{"index" : {"_index" : "test", "_type" : "member"}}
{"person_name" : "Lucy Lue", "city_name" : "London"}
{"index" : {"_index" : "test", "_type" : "member"}}
{"person_name" : "Ævar Arnfjörð Bjarmason", "city_name" : "كوالالمبور"}
{"index" : {"_index" : "test", "_type" : "member"}}
{"person_name" : "Ævar Johnson", "city_name" : "The city of كوالالمبور"}
{"index" : {"_index" : "test", "_type" : "member"}}
{"person_name" : "Juhen Smith", "city_name" : "Amsterdam"}
'; echo
# Wait for indexing
echo "Refreshing"
curl -XPOST 'http://localhost:9200/_refresh?pretty=yes'; echo
echo "Doing the search"
curl -XGET 'http://localhost:9200/test/member/_search?pretty=1' -d '
{
"explain" : "false",
"query" : {
"bool" : {
"minimum_number_should_match" : 2,
"should" : [
{
"dis_max" : {
"queries" : [
{
"bool" : {
"boost" : 10,
"must" : {
"text" : {
"person_name.person_name" : "Xvar"
}
}
}
},
{
"text" : {
"person_name.partial_person_name" : "Xvar"
}
}
]
}
},
{
"dis_max" : {
"queries" : [
{
"bool" : {
"boost" : 5,
"must" : {
"text" : {
"city_name.city_name" : "كوالFUZZYالمبورWOO"
}
}
}
},
{
"text" : {
"city_name.partial_city_name" : "كوالFUZZYالمبورWOO"
}
}
]
}
}
]
}
}
}
'; echo
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment