Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Elasticsearch: having multiple tokens at the same position is a problem?
#!/bin/sh
echo "Elasticsearch version"
curl localhost:9200; echo; echo;
echo "Delete index"; curl -X DELETE 'localhost:9200/i'; echo; echo;
echo "Create index with analysis and mappings"; curl -X PUT 'localhost:9200/i' -d '{
"settings" : {
"analysis" : {
"analyzer" : {
"index" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase"]
},
"search" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["lowercase","synonym"]
}
},
"filter" : {
"synonym" : {
"type" : "synonym",
"synonyms" : [
"fast, quick"
]
}}},
"mappings" : {
"t" : {
"properties" : {
"text" : {
"type" : "string",
"index_analyzer" : "index",
"search_analyzer" : "search"
}}}}}}'; echo; echo;
# Wait for all the index shards to be allocated
curl -s -X GET 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=5s' > /dev/null
echo "Test synonyms for 'fast': should output two tokens"; curl -X POST 'localhost:9200/i/_analyze?analyzer=search&format=text&text=fast'; echo; echo;
echo "Index data: 'Quick brown fox'"; curl -X POST 'localhost:9200/i/t' -d '{
"text" : "Quick brown fox"
}'; echo; echo;
echo "Refresh Lucene reader"; curl -X POST 'localhost:9200/i/_refresh'; echo; echo;
echo "Testing search";
echo ===========================
echo "1) query_string: quick";
curl -X GET 'localhost:9200/_search' -d '{"query":{"query_string":{"query":"quick","default_field":"text"}}}'; echo; echo;
echo "2) query_string: fast - is search_analyzer used?";
curl -X GET 'localhost:9200/_search' -d '{"query":{"query_string":{"query":"fast","default_field":"text"}}}'; echo; echo;
echo "2.5) query_string: fast - forcing search_analyzer";
curl -X GET 'localhost:9200/_search' -d '{"query":{"query_string":{"query":"fast","default_field":"text","analyzer":"search"}}}'; echo; echo;
echo "3) query_string: fast - forcing search_analyzer, forcing AND operator";
curl -X GET 'localhost:9200/_search' -d '{"query":{"query_string":{"query":"fast","default_field":"text","analyzer":"search","default_operator":"AND"}}}'; echo; echo;
echo "4) match query: quick";
curl -X GET 'localhost:9200/_search' -d '{"query":{"match":{"text":{"query":"quick","analyzer":"search"}}}}'; echo; echo;
echo "5) match query: fast";
curl -X GET 'localhost:9200/_search' -d '{"query":{"match":{"text":{"query":"fast","analyzer":"search"}}}}'; echo; echo;
echo "6) match query: fast - forcing AND operator";
curl -X GET 'localhost:9200/_search' -d '{"query":{"match":{"text":{"query":"fast","analyzer":"search","operator":"AND"}}}}'; echo; echo;
@lukas-vlcek

This comment has been minimized.

Copy link
Owner Author

commented Oct 10, 2013

The output is the following:


Elasticsearch version
{
  "ok" : true,
  "status" : 200,
  "name" : "The Wink",
  "version" : {
    "number" : "0.90.5",
    "build_hash" : "c8714e8e0620b62638f660f6144831792b9dedee",
    "build_timestamp" : "2013-09-17T12:50:20Z",
    "build_snapshot" : false,
    "lucene_version" : "4.4"
  },
  "tagline" : "You Know, for Search"
}

Delete index
{"ok":true,"acknowledged":true}

Create index with analysis and mappings
{"ok":true,"acknowledged":true}

Test synonyms for 'fast': should output two tokens
{"tokens":"[fast:0->4:SYNONYM]\n[quick:0->4:SYNONYM]\n"}

Index data: 'Quick brown fox'
{"ok":true,"_index":"i","_type":"t","_id":"0N2FX_vxR5qsMTYczFPl1w","_version":1}

Refresh Lucene reader
{"ok":true,"_shards":{"total":10,"successful":5,"failed":0}}

Testing search
===========================
1) query_string: quick
{"took":4,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":1,"max_score":0.15342641,"hits":[{"_index":"i","_type":"t","_id":"0N2FX_vxR5qsMTYczFPl1w","_score":0.15342641, "_source" : {
  "text" : "Quick brown fox"
}}]}}

2) query_string: fast - is search_analyzer used?
{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}

2.5) query_string: fast - forcing search_analyzer
{"took":3,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":1,"max_score":0.04500804,"hits":[{"_index":"i","_type":"t","_id":"0N2FX_vxR5qsMTYczFPl1w","_score":0.04500804, "_source" : {
  "text" : "Quick brown fox"
}}]}}

3) query_string: fast - forcing search_analyzer, forcing AND operator
{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":1,"max_score":0.04500804,"hits":[{"_index":"i","_type":"t","_id":"0N2FX_vxR5qsMTYczFPl1w","_score":0.04500804, "_source" : {
  "text" : "Quick brown fox"
}}]}}


4) match query: quick
{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":1,"max_score":0.04500804,"hits":[{"_index":"i","_type":"t","_id":"0N2FX_vxR5qsMTYczFPl1w","_score":0.04500804, "_source" : {
  "text" : "Quick brown fox"
}}]}}

5) match query: fast
{"took":3,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":1,"max_score":0.04500804,"hits":[{"_index":"i","_type":"t","_id":"0N2FX_vxR5qsMTYczFPl1w","_score":0.04500804, "_source" : {
  "text" : "Quick brown fox"
}}]}}

6) match query: fast - forcing AND operator
{"took":4,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.