Created
October 26, 2011 17:55
-
-
Save ppearcy/1317149 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -XDELETE 'http://localhost:9200/syntest/' | |
curl -XPOST localhost:9200/syntest -d '{ | |
"settings" : { | |
"number_of_shards" : 1, | |
"analysis" : {"analyzer": {"syntest_analyzer":{"type":"custom","tokenizer":"standard","filter":["standard", "lowercase", "syntest_synonym", "stop", "english_snowball"]}, "dr_no_syn" : {"type":"custom","tokenizer":"standard","filter":["standard", "lowercase", "stop", "english_snowball"]}},"filter":{"syntest_synonym":{"type":"synonym","synonyms" : ["atripla, efavirenz emtricitabine, efavirenz/emtricitabine", "emtricitabine tenofovir, emtricitabine/tenofovir, truvada", "tenofovir disoproxil, viread"],"ignore_case":true,"expand":true, "tokenizer":"standard"},"english_snowball":{"type":"snowball","language" : "English"}}} | |
}, | |
"mappings" : { | |
"syntest" : { | |
"properties" : { | |
"attachmentcontent" : { "type" : "string", "omit_norms" : true, "include_in_all" : false, "analyzer" : "syntest_analyzer", "term_vector" : "with_positions_offsets"} | |
} | |
} | |
} | |
}' | |
curl -XPUT 'http://localhost:9200/syntest/syntest/1' -d '{ | |
"attachmentcontent" : "atripla" | |
}' | |
curl -XPUT 'http://localhost:9200/syntest/syntest/2' -d '{ | |
"attachmentcontent" : "viread" | |
}' | |
curl -XPUT 'http://localhost:9200/syntest/syntest/3' -d '{ | |
"attachmentcontent" : "truvada" | |
}' | |
# Running the below query shows a couple of things | |
# 1) It appears highlights get off base when there are multi terms ones | |
# 2) Synonym expansion is occurring query side (as well as index side) and it ends up polluting results | |
curl -XGET http://localhost:9200/syntest/_search?pretty=True -d ' {"explain":true,"from":0,"size":10,"query":{"query_string":{"query":"((((attachmentcontent:(truvada)))))","default_operator":"or"}},"highlight":{"pre_tags":["<Highlight>"],"post_tags":["</Highlight>"],"fields":{"attachmentcontent":{"fragment_size":300,"number_of_fragments":1,"fragment_offset":100}}}}' | |
# Explicitely setting the analyzer to the one without synonyms shows the more correct synonym behavior | |
curl -XGET http://localhost:9200/syntest/_search?pretty=True -d ' {"explain":true,"from":0,"size":10,"query":{"query_string":{"analyzer":"dr_no_syn", "query":"((((attachmentcontent:(truvada)))))","default_operator":"or"}},"highlight":{"pre_tags":["<Highlight>"],"post_tags":["</Highlight>"],"fields":{"attachmentcontent":{"fragment_size":300,"number_of_fragments":1,"fragment_offset":100}}}}' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment