Skip to content

Instantly share code, notes, and snippets.

@jurgynet
Created September 26, 2013 20:43
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jurgynet/6720252 to your computer and use it in GitHub Desktop.
Save jurgynet/6720252 to your computer and use it in GitHub Desktop.
Elastic search name testing: standard, asis, phonetic, nickname
#!/bin/sh
# Delete old version to make sure new settings are applied
curl -XDELETE "localhost:9200/names-test/"
echo
# See https://github.com/elasticsearch/elasticsearch-analysis-phonetic
curl -XPUT "localhost:9200/names-test" -d '{
"settings": {
"analysis": {
"analyzer": {
"lowkeyword-analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter" : ["lowercase"]
},
"phonetic-analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter" : ["standard", "lowercase", "dmeta-filter"]
},
"nickname-analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter" : ["onetoken-filter", "lowercase", "nickname-filter"]
}
},
"filter" : {
"dmeta-filter" : {
"type" : "phonetic",
"encoder" : "doublemetaphone",
"replace" : true
},
"onetoken-filter" : {
"type" : "limit",
"max_token_count" : 1
},
"nickname-filter" : {
"type" : "synonym",
"synonyms_path" : "../config/analysis/nick2basename.txt"
}
}
}
},
"mappings": {
"_default_": {
"_timestamp" : {
"enabled" : true,
"store" : true
}
},
"doc": {
"properties": {
"name" : {
"type" : "multi_field",
"fields" : {
"name" : {"type" : "string", "index" : "analyzed",
"index_analyzer": "standard", "search_analyzer": "standard"},
"asis" : {"type" : "string", "index" : "analyzed",
"index_analyzer": "lowkeyword-analyzer", "search_analyzer": "lowkeyword-analyzer"},
"phonetic" : {"type" : "string", "index" : "analyzed",
"index_analyzer": "phonetic-analyzer", "search_analyzer": "phonetic-analyzer"},
"basename" : {"type" : "string", "index" : "analyzed",
"index_analyzer": "nickname-analyzer", "search_analyzer": "nickname-analyzer"}
}
},
"date" : {
"type" : "date",
"format" : "dateOptionalTime"
}
}
}
}
}'
# curious about path analyzer? test it:
echo testing lowkeyword analyzier
curl -XGET 'localhost:9200/names-test/_analyze?analyzer=lowkeyword-analyzer&pretty=1' -d 'ALAN JURGENSEN'
echo
echo testing phonetic analyzier
curl -XGET 'localhost:9200/names-test/_analyze?analyzer=phonetic-analyzer&pretty=1' -d 'ALEN JORGENSEN'
echo
echo testing nickname analyzier
curl -XGET 'localhost:9200/names-test/_analyze?analyzer=nickname-analyzer&pretty=1' -d 'BILL ALAN'
echo
#!/bin/sh
# first load : schema-accts-search-exp.sh
putdata () {
# Put some new docs
curl -sXPUT "localhost:9200/names-test/doc/1001" -d '{"name": "Joe Johnson", "date": "2013-08-01"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1002" -d '{"name": "JOHN JOHNSON", "date": "2013-08-02"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1003" -d '{"name": "jeff smith", "date": "2013-08-03"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1004" -d '{"name": "Jeff Johnson", "date": "2013-08-04"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1005" -d '{"name": "john smith", "date": "2013-08-05"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1006" -d '{"name": "ALAN HANSON", "date": "2013-08-06"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1007" -d '{"name": "MIKE SMITH", "date": "2013-08-07"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1008" -d '{"name": "ALAN JURGENSEN", "date": "2013-08-08"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1009" -d '{"name": "ALLEN JORGENSON", "date": "2013-08-09"}'
echo
curl -sXPUT "localhost:9200/names-test/doc/1010" -d '{"name": "ALLEN JURGENSON", "date": "2013-08-10"}'
echo
curl -sXPOST "localhost:9200/names-test/_refresh"
echo
# check for existence via:
curl -sXHEAD 'localhost:9200/names-test/doc/1001'
echo
# get doc
curl -sXGET 'localhost:9200/names-test/doc/1008?pretty'
echo
curl -sXGET 'localhost:9200/names-test/doc/1009?pretty'
echo
# get specific fields (default field is _source)
curl -sXGET 'localhost:9200/names-test/doc/1008?fields=_timestamp,name&pretty'
echo
}
# search and match _all
curl -sXPOST "localhost:9200/names-test/_search?pretty" -d '{"fields": ["_timestamp","_source"], "query": {"match_all" : {}}}'
echo
# curious about path analyzer? test it:
echo testing lowkeyword analyzier
curl -sXGET 'localhost:9200/names-test/_analyze?analyzer=lowkeyword-analyzer&pretty' -d 'ALAN JURGENSEN'
echo
echo testing phonetic analyzier
curl -sXGET 'localhost:9200/names-test/_analyze?analyzer=phonetic-analyzer&pretty' -d 'ALAN JURGENSEN'
echo
echo testing nickname to basename analyzier
curl -sXGET 'localhost:9200/names-test/_analyze?analyzer=nickname-analyzer&pretty' -d 'BILL ALAN'
echo
echo 'Name match query: name'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
"query": {
"match": {
"name": "ALAN"
}
}
}'
echo
echo 'Name match query: name.asis'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
"query": {
"match": {
"name.asis": "ALAN"
}
}
}'
echo
echo 'Name match query: name.asis'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
"query": {
"match": {
"name.asis": "alan jurgensen"
}
}
}'
echo
echo 'Name match query: name.phonetic'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
"query": {
"match": {
"name.phonetic": "alan jurgensen"
}
}
}'
echo
echo 'Name match query: name.basename'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
"query": {
"match": {
"name.basename": "allen"
}
}
}'
echo
echo 'Name multi-match query: name.name name.asis name.phonetic name.basename'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
"query": {
"multi_match": {
"query": "alan Jurgensen",
"fields": [ "name.name", "name.asis", "name.phonetic", "name.basename" ]
}
}
}'
echo
echo 'Name multi-match BOOST query: name.name name.asis name.phonetic name.basename'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{
"query": {
"multi_match": {
"query": "alan Jurgensen",
"fields": [ "name.name^1.6", "name.asis^3", "name.phonetic^1.4", "name.basename^1.2" ]
}
}
}'
echo
echo 'Name multi-match BIGBOOST query: name.name name.asis name.phonetic name.basename'
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty&explain" -d '{
"query": {
"multi_match": {
"query": "alan Jurgensen",
"fields": [ "name.name^4", "name.asis^6", "name.phonetic^3", "name.basename^2" ]
}
}
}'
echo
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment