Skip to content

Instantly share code, notes, and snippets.

@jeffsteinmetz
Last active August 29, 2015 14:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeffsteinmetz/2ea8329c667386c80fae to your computer and use it in GitHub Desktop.
Save jeffsteinmetz/2ea8329c667386c80fae to your computer and use it in GitHub Desktop.
Determine list [array] size in elasticsearch issue
# tested against elasticsearch 1.4.1
# groovy script does not accuratly count number of elements in list
# if you don't add a mapping to "not_analyze" it is even worse
INDEX_NAME='list-count-test'
NODE='localhost'
curl -XDELETE 'http://'$NODE':9200/'$INDEX_NAME
curl -XPUT 'http://'$NODE':9200/'$INDEX_NAME'/' -d '{
"settings" : {
"index" : {
"number_of_shards" : 1,
"number_of_replicas" : 1
}
}
}'
# set mapping
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post/_mapping' -d '{
"post": {
"properties": {
"titles": { "type": "string", "index": "not_analyzed" }
}
}
}'
# insert some test data
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{
"titles": ["one", "duplicate", "duplicate"]
}'
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{
"titles": ["one", "two"]
}'
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{
"titles": ["one", "with spaces", "more spaces","four"]
}'
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{
"titles": ["this/has/slashes", "more/slashes"]
}'
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{
"titles": ["http://bit.ly/abc", "http://bit.ly/abc", "http://bit.ly/def", "http://bit.ly/ghi"]
}'
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{
"titles": ["one"]
}'
# show the mapping
#curl -XGET 'http://'$NODE':9200/'$INDEX_NAME'/post/_mapping'
ECHO
ECHO '-------------'
ECHO ' waiting 6 seconds for index to refresh '
sleep 6
# this should only return only records with 2 titles in list
# but this returns an incorrect item
# "titles": ["one", "duplicate", "duplicate"]
# and does not return as a doc with 2 items
# "titles": ["http://bit.ly/abc", "http://bit.ly/abc"]
ECHO 'TEST 1, should only return documents with 2 items in titles list [array]'
ECHO
curl -XGET 'http://'$NODE':9200/'$INDEX_NAME'/post/_search?pretty=true' -d '
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"match_all": {}
}
]
}
},
"filter": {
"script": {
"script": "doc['\'titles\''].values.size() == 2"
}
}
}
}
}'
ECHO 'TEST 2'
ECHO
curl -XGET 'http://'$NODE':9200/'$INDEX_NAME'/post/_search?pretty=true' -d '
{
"size": 20,
"script_fields": {
"sizeFromScript": {
"script": "doc['\'titles\''].values.size()"
}
},
"query": {
"bool": {
"must": [
{
"match_all": {}
}
]
}
},
"_source": {
"includes": [
"titles"
]
}
}'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment