Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@fmpwizard
Created October 18, 2012 04:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fmpwizard/3909810 to your computer and use it in GitHub Desktop.
Save fmpwizard/3909810 to your computer and use it in GitHub Desktop.
search on description
# ========================================
# Testing n-gram analysis in ElasticSearch
# ========================================
# The goal is to search for terms like
# key
# keyboard
# 8
# 8 cell
# 8x DVD
# CD-RW
# CDRW
# And only find those documents with the matching words, for key, it should find keyboard, for 8, it should find Li-ION 8 cells
# CDRW and CD-RW should be treated as the same term
# 8 cell should find 8 cells , but not 6 cells
curl -X DELETE localhost:9200/test
curl -X PUT localhost:9200/test -d '
{
"settings" : {
"index" : {
"analysis" : {
"tokenizer" :{
"description_tokenizer" : {
"type" : "pattern",
"pattern" : "\\s+"
}
},
"analyzer" : {
"description_analyzer" : {
"type" : "custom",
"tokenizer" : "description_tokenizer",
"filter" : [ "ngram_desc", "desc_word_filter", "lowercase"]
},
"description_shingle_analyzer" : {
"type" : "custom",
"tokenizer" : "description_tokenizer",
"filter" : [ "desc_shingle", "lowercase"]
}
},
"filter" : {
"ngram_desc" : {
"type" : "nGram",
"min_gram" : 3,
"max_gram" : 3
},
"desc_shingle" : {
"type": "shingle",
"min_shingle_size": 2,
"max_shingle_size": 2,
"token_separator" : ""
},
"desc_word_filter": {
"type": "word_delimiter",
"generate_word_parts" : "false",
"generate_number_parts" : "false",
"split_on_numerics" : "false",
"split_on_case_change" : "false",
"catenate_words" : "true",
"preserve_original" : "true"
}
}
}
}
},
"mappings": {
"main": {
"properties": {
"description": {
"type": "multi_field",
"fields" : {
"description" : {
"type" : "string",
"analyzer": "description_analyzer"
},
"description_shingle" : {
"type" : "string",
"analyzer": "description_shingle_analyzer"
}
}
}
}
}
}
}
'
curl -X POST "http://localhost:9200/test/main" -d '
{
"description" : "This keyBoard for Toshiba",
"description2" : "This keyBoard for Toshiba"
}'
curl -X POST "http://localhost:9200/test/main" -d '
{
"description" : "This keyboard for HP",
"description2" : "This keyboard for HP"
}'
curl -X POST "http://localhost:9200/test/main" -d '
{
"description" : "This battery li-ion toshiba",
"description2" : "This battery li-ion toshiba"
}'
curl -X POST "http://localhost:9200/test/main" -d '
{
"description" : "This CD-RW/ DVD DRIVE Combo toshiba",
"description2" : "This CD-RW/ DVD DRIVE Combo toshiba"
}'
curl -X POST "http://localhost:9200/test/main" -d '
{
"description" : "This cd-rw DVD DRIVE Combo toshiba",
"description2" : "This cd-rw DVD DRIVE Combo toshiba"
}'
curl -X POST "http://localhost:9200/test/main" -d '
{
"description" : "8X DVD Drive",
"description2" : "8X DVD Drive"
}'
curl -X POST "http://localhost:9200/test/main" -d '
{
"description" : "6X DVD Drive",
"description2" : "6X DVD Drive"
}'
curl -X POST "http://localhost:9200/test/main" -d '
{
"description" : "Li-ION 6 Cells",
"description2" : "Li-ION 6 Cells"
}'
curl -X POST "http://localhost:9200/test/main" -d '
{
"description" : "Li-ION 8 Cells",
"description2" : "Li-ION 8 Cells"
}'
curl -X POST "http://localhost:9200/test/_refresh"
curl "http://localhost:9200/test/_analyze?text=8x+mas+menos&analyzer=description_analyzer&pretty=true"
URLS='
http://localhost:9200/test/_search?q=description:key&default_operator=AND
http://localhost:9200/test/_search?q=description:key%20toshiba&default_operator=AND
http://localhost:9200/test/_search?q=description:toshiba&default_operator=AND
http://localhost:9200/test/_search?q=description:li-ion&default_operator=AND
http://localhost:9200/test/_search?q=description:cdrw&default_operator=AND
http://localhost:9200/test/_search?q=description:cd-rw&default_operator=AND
http://localhost:9200/test/_search?q=description:8x&default_operator=AND
http://localhost:9200/test/_search?q=description2:8x&default_operator=AND
http://localhost:9200/test/_search?q=description2:6+Cells&default_operator=AND
'
#for url in ${URLS}
#This is disable on purpose
for url in "nada"
do
echo; echo; echo ">>> ${url}&pretty=true"
#if which open &> /dev/null; then
# open "${url}&pretty=true"
#fi
curl "${url}&pretty=true"
done
curl "http://localhost:9200/test/_analyze?text=6+cell+battery&analyzer=description_shingle_analyzer&pretty=true"
PAYLOAD='
{"query":{"bool":{"should":
[
{
"match":
{
"description":{
"query":"6 cell","operator":"or"
}
}
},
{
"match":
{
"description_shingle":{
"query":"6 cell","operator":"or"
}
}
}
],
"disable_coord" : true,
"minimum_should_match" : "70%"
}}}
'
echo;echo ">>>";echo;
curl -XGET "http://localhost:9200/test/_search?pretty=true" -d "${PAYLOAD}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment