Created
October 18, 2012 04:06
-
-
Save fmpwizard/3909810 to your computer and use it in GitHub Desktop.
search on description
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ======================================== | |
# Testing n-gram analysis in ElasticSearch | |
# ======================================== | |
# The goal is to search for terms like | |
# key | |
# keyboard | |
# 8 | |
# 8 cell | |
# 8x DVD | |
# CD-RW | |
# CDRW | |
# And only find those documents with the matching words, for key, it should find keyboard, for 8, it should find Li-ION 8 cells | |
# CDRW and CD-RW should be treated as the same term | |
# 8 cell should find 8 cells , but not 6 cells | |
curl -X DELETE localhost:9200/test | |
curl -X PUT localhost:9200/test -d ' | |
{ | |
"settings" : { | |
"index" : { | |
"analysis" : { | |
"tokenizer" :{ | |
"description_tokenizer" : { | |
"type" : "pattern", | |
"pattern" : "\\s+" | |
} | |
}, | |
"analyzer" : { | |
"description_analyzer" : { | |
"type" : "custom", | |
"tokenizer" : "description_tokenizer", | |
"filter" : [ "ngram_desc", "desc_word_filter", "lowercase"] | |
}, | |
"description_shingle_analyzer" : { | |
"type" : "custom", | |
"tokenizer" : "description_tokenizer", | |
"filter" : [ "desc_shingle", "lowercase"] | |
} | |
}, | |
"filter" : { | |
"ngram_desc" : { | |
"type" : "nGram", | |
"min_gram" : 3, | |
"max_gram" : 3 | |
}, | |
"desc_shingle" : { | |
"type": "shingle", | |
"min_shingle_size": 2, | |
"max_shingle_size": 2, | |
"token_separator" : "" | |
}, | |
"desc_word_filter": { | |
"type": "word_delimiter", | |
"generate_word_parts" : "false", | |
"generate_number_parts" : "false", | |
"split_on_numerics" : "false", | |
"split_on_case_change" : "false", | |
"catenate_words" : "true", | |
"preserve_original" : "true" | |
} | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"main": { | |
"properties": { | |
"description": { | |
"type": "multi_field", | |
"fields" : { | |
"description" : { | |
"type" : "string", | |
"analyzer": "description_analyzer" | |
}, | |
"description_shingle" : { | |
"type" : "string", | |
"analyzer": "description_shingle_analyzer" | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
' | |
curl -X POST "http://localhost:9200/test/main" -d ' | |
{ | |
"description" : "This keyBoard for Toshiba", | |
"description2" : "This keyBoard for Toshiba" | |
}' | |
curl -X POST "http://localhost:9200/test/main" -d ' | |
{ | |
"description" : "This keyboard for HP", | |
"description2" : "This keyboard for HP" | |
}' | |
curl -X POST "http://localhost:9200/test/main" -d ' | |
{ | |
"description" : "This battery li-ion toshiba", | |
"description2" : "This battery li-ion toshiba" | |
}' | |
curl -X POST "http://localhost:9200/test/main" -d ' | |
{ | |
"description" : "This CD-RW/ DVD DRIVE Combo toshiba", | |
"description2" : "This CD-RW/ DVD DRIVE Combo toshiba" | |
}' | |
curl -X POST "http://localhost:9200/test/main" -d ' | |
{ | |
"description" : "This cd-rw DVD DRIVE Combo toshiba", | |
"description2" : "This cd-rw DVD DRIVE Combo toshiba" | |
}' | |
curl -X POST "http://localhost:9200/test/main" -d ' | |
{ | |
"description" : "8X DVD Drive", | |
"description2" : "8X DVD Drive" | |
}' | |
curl -X POST "http://localhost:9200/test/main" -d ' | |
{ | |
"description" : "6X DVD Drive", | |
"description2" : "6X DVD Drive" | |
}' | |
curl -X POST "http://localhost:9200/test/main" -d ' | |
{ | |
"description" : "Li-ION 6 Cells", | |
"description2" : "Li-ION 6 Cells" | |
}' | |
curl -X POST "http://localhost:9200/test/main" -d ' | |
{ | |
"description" : "Li-ION 8 Cells", | |
"description2" : "Li-ION 8 Cells" | |
}' | |
curl -X POST "http://localhost:9200/test/_refresh" | |
curl "http://localhost:9200/test/_analyze?text=8x+mas+menos&analyzer=description_analyzer&pretty=true" | |
URLS=' | |
http://localhost:9200/test/_search?q=description:key&default_operator=AND | |
http://localhost:9200/test/_search?q=description:key%20toshiba&default_operator=AND | |
http://localhost:9200/test/_search?q=description:toshiba&default_operator=AND | |
http://localhost:9200/test/_search?q=description:li-ion&default_operator=AND | |
http://localhost:9200/test/_search?q=description:cdrw&default_operator=AND | |
http://localhost:9200/test/_search?q=description:cd-rw&default_operator=AND | |
http://localhost:9200/test/_search?q=description:8x&default_operator=AND | |
http://localhost:9200/test/_search?q=description2:8x&default_operator=AND | |
http://localhost:9200/test/_search?q=description2:6+Cells&default_operator=AND | |
' | |
#for url in ${URLS} | |
#This is disable on purpose | |
for url in "nada" | |
do | |
echo; echo; echo ">>> ${url}&pretty=true" | |
#if which open &> /dev/null; then | |
# open "${url}&pretty=true" | |
#fi | |
curl "${url}&pretty=true" | |
done | |
curl "http://localhost:9200/test/_analyze?text=6+cell+battery&analyzer=description_shingle_analyzer&pretty=true" | |
PAYLOAD=' | |
{"query":{"bool":{"should": | |
[ | |
{ | |
"match": | |
{ | |
"description":{ | |
"query":"6 cell","operator":"or" | |
} | |
} | |
}, | |
{ | |
"match": | |
{ | |
"description_shingle":{ | |
"query":"6 cell","operator":"or" | |
} | |
} | |
} | |
], | |
"disable_coord" : true, | |
"minimum_should_match" : "70%" | |
}}} | |
' | |
echo;echo ">>>";echo; | |
curl -XGET "http://localhost:9200/test/_search?pretty=true" -d "${PAYLOAD}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment