Created
September 26, 2013 20:43
-
-
Save jurgynet/6720252 to your computer and use it in GitHub Desktop.
Elastic search name testing: standard, asis, phonetic, nickname
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# Delete old version to make sure new settings are applied | |
curl -XDELETE "localhost:9200/names-test/" | |
echo | |
# See https://github.com/elasticsearch/elasticsearch-analysis-phonetic | |
curl -XPUT "localhost:9200/names-test" -d '{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"lowkeyword-analyzer": { | |
"type": "custom", | |
"tokenizer": "keyword", | |
"filter" : ["lowercase"] | |
}, | |
"phonetic-analyzer": { | |
"type": "custom", | |
"tokenizer": "whitespace", | |
"filter" : ["standard", "lowercase", "dmeta-filter"] | |
}, | |
"nickname-analyzer": { | |
"type": "custom", | |
"tokenizer": "whitespace", | |
"filter" : ["onetoken-filter", "lowercase", "nickname-filter"] | |
} | |
}, | |
"filter" : { | |
"dmeta-filter" : { | |
"type" : "phonetic", | |
"encoder" : "doublemetaphone", | |
"replace" : true | |
}, | |
"onetoken-filter" : { | |
"type" : "limit", | |
"max_token_count" : 1 | |
}, | |
"nickname-filter" : { | |
"type" : "synonym", | |
"synonyms_path" : "../config/analysis/nick2basename.txt" | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"_default_": { | |
"_timestamp" : { | |
"enabled" : true, | |
"store" : true | |
} | |
}, | |
"doc": { | |
"properties": { | |
"name" : { | |
"type" : "multi_field", | |
"fields" : { | |
"name" : {"type" : "string", "index" : "analyzed", | |
"index_analyzer": "standard", "search_analyzer": "standard"}, | |
"asis" : {"type" : "string", "index" : "analyzed", | |
"index_analyzer": "lowkeyword-analyzer", "search_analyzer": "lowkeyword-analyzer"}, | |
"phonetic" : {"type" : "string", "index" : "analyzed", | |
"index_analyzer": "phonetic-analyzer", "search_analyzer": "phonetic-analyzer"}, | |
"basename" : {"type" : "string", "index" : "analyzed", | |
"index_analyzer": "nickname-analyzer", "search_analyzer": "nickname-analyzer"} | |
} | |
}, | |
"date" : { | |
"type" : "date", | |
"format" : "dateOptionalTime" | |
} | |
} | |
} | |
} | |
}' | |
# curious about path analyzer? test it: | |
echo testing lowkeyword analyzier | |
curl -XGET 'localhost:9200/names-test/_analyze?analyzer=lowkeyword-analyzer&pretty=1' -d 'ALAN JURGENSEN' | |
echo | |
echo testing phonetic analyzier | |
curl -XGET 'localhost:9200/names-test/_analyze?analyzer=phonetic-analyzer&pretty=1' -d 'ALEN JORGENSEN' | |
echo | |
echo testing nickname analyzier | |
curl -XGET 'localhost:9200/names-test/_analyze?analyzer=nickname-analyzer&pretty=1' -d 'BILL ALAN' | |
echo | |
#!/bin/sh | |
# first load : schema-accts-search-exp.sh | |
putdata () { | |
# Put some new docs | |
curl -sXPUT "localhost:9200/names-test/doc/1001" -d '{"name": "Joe Johnson", "date": "2013-08-01"}' | |
echo | |
curl -sXPUT "localhost:9200/names-test/doc/1002" -d '{"name": "JOHN JOHNSON", "date": "2013-08-02"}' | |
echo | |
curl -sXPUT "localhost:9200/names-test/doc/1003" -d '{"name": "jeff smith", "date": "2013-08-03"}' | |
echo | |
curl -sXPUT "localhost:9200/names-test/doc/1004" -d '{"name": "Jeff Johnson", "date": "2013-08-04"}' | |
echo | |
curl -sXPUT "localhost:9200/names-test/doc/1005" -d '{"name": "john smith", "date": "2013-08-05"}' | |
echo | |
curl -sXPUT "localhost:9200/names-test/doc/1006" -d '{"name": "ALAN HANSON", "date": "2013-08-06"}' | |
echo | |
curl -sXPUT "localhost:9200/names-test/doc/1007" -d '{"name": "MIKE SMITH", "date": "2013-08-07"}' | |
echo | |
curl -sXPUT "localhost:9200/names-test/doc/1008" -d '{"name": "ALAN JURGENSEN", "date": "2013-08-08"}' | |
echo | |
curl -sXPUT "localhost:9200/names-test/doc/1009" -d '{"name": "ALLEN JORGENSON", "date": "2013-08-09"}' | |
echo | |
curl -sXPUT "localhost:9200/names-test/doc/1010" -d '{"name": "ALLEN JURGENSON", "date": "2013-08-10"}' | |
echo | |
curl -sXPOST "localhost:9200/names-test/_refresh" | |
echo | |
# check for existence via: | |
curl -sXHEAD 'localhost:9200/names-test/doc/1001' | |
echo | |
# get doc | |
curl -sXGET 'localhost:9200/names-test/doc/1008?pretty' | |
echo | |
curl -sXGET 'localhost:9200/names-test/doc/1009?pretty' | |
echo | |
# get specific fields (default field is _source) | |
curl -sXGET 'localhost:9200/names-test/doc/1008?fields=_timestamp,name&pretty' | |
echo | |
} | |
# search and match _all | |
curl -sXPOST "localhost:9200/names-test/_search?pretty" -d '{"fields": ["_timestamp","_source"], "query": {"match_all" : {}}}' | |
echo | |
# curious about path analyzer? test it: | |
echo testing lowkeyword analyzier | |
curl -sXGET 'localhost:9200/names-test/_analyze?analyzer=lowkeyword-analyzer&pretty' -d 'ALAN JURGENSEN' | |
echo | |
echo testing phonetic analyzier | |
curl -sXGET 'localhost:9200/names-test/_analyze?analyzer=phonetic-analyzer&pretty' -d 'ALAN JURGENSEN' | |
echo | |
echo testing nickname to basename analyzier | |
curl -sXGET 'localhost:9200/names-test/_analyze?analyzer=nickname-analyzer&pretty' -d 'BILL ALAN' | |
echo | |
echo 'Name match query: name' | |
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{ | |
"query": { | |
"match": { | |
"name": "ALAN" | |
} | |
} | |
}' | |
echo | |
echo 'Name match query: name.asis' | |
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{ | |
"query": { | |
"match": { | |
"name.asis": "ALAN" | |
} | |
} | |
}' | |
echo | |
echo 'Name match query: name.asis' | |
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{ | |
"query": { | |
"match": { | |
"name.asis": "alan jurgensen" | |
} | |
} | |
}' | |
echo | |
echo 'Name match query: name.phonetic' | |
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{ | |
"query": { | |
"match": { | |
"name.phonetic": "alan jurgensen" | |
} | |
} | |
}' | |
echo | |
echo 'Name match query: name.basename' | |
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{ | |
"query": { | |
"match": { | |
"name.basename": "allen" | |
} | |
} | |
}' | |
echo | |
echo 'Name multi-match query: name.name name.asis name.phonetic name.basename' | |
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{ | |
"query": { | |
"multi_match": { | |
"query": "alan Jurgensen", | |
"fields": [ "name.name", "name.asis", "name.phonetic", "name.basename" ] | |
} | |
} | |
}' | |
echo | |
echo 'Name multi-match BOOST query: name.name name.asis name.phonetic name.basename' | |
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty" -d '{ | |
"query": { | |
"multi_match": { | |
"query": "alan Jurgensen", | |
"fields": [ "name.name^1.6", "name.asis^3", "name.phonetic^1.4", "name.basename^1.2" ] | |
} | |
} | |
}' | |
echo | |
echo 'Name multi-match BIGBOOST query: name.name name.asis name.phonetic name.basename' | |
curl -sXPOST "localhost:9200/names-test/doc/_search?pretty&explain" -d '{ | |
"query": { | |
"multi_match": { | |
"query": "alan Jurgensen", | |
"fields": [ "name.name^4", "name.asis^6", "name.phonetic^3", "name.basename^2" ] | |
} | |
} | |
}' | |
echo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment