-
-
Save P-Hill/4035647 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo delete any existing test-idx index | |
curl -XDELETE 'http://localhost:9200/test-idx' | |
echo | |
echo | |
echo define mappings ... | |
curl -XPUT 'http://localhost:9200/test-idx' -d '{ | |
"mappings": { | |
"document": { | |
"_source": { enabled:true }, | |
"_all": { enabled:false }, | |
"dynamic": "strict", | |
"properties": { | |
"title": { | |
"type": "string", "stored": true, index:"analyzed" | |
}, | |
"acl": { | |
"type": "string", "stored": true, index:"not_analyzed" | |
}, | |
"tags": { | |
"type": "nested", | |
"include_in_parent": false, | |
"all_enabled": false, | |
"dynamic": "strict", | |
"properties": { | |
"words": { | |
"type": "string", | |
"index": "analyzed", | |
"stored": true | |
}, | |
"last": { | |
"type": "string", | |
"index": "not_analyzed", | |
"stored": true | |
}, | |
"counter": { | |
"type": "long", | |
"stored": true | |
} | |
} | |
} | |
} | |
} | |
} | |
}' | |
echo | |
echo "define document #1 accessable by me" | |
curl -XPUT 'http://localhost:9200/test-idx/document/1' -d '{ | |
"title": "Accessable #1 (by MY_RELATIVES or MY_FRIENDS) with the right tags.", | |
"acl": [ "ME", "MY_FRIENDS", "MY_RELATIVES" ], | |
"tags": [ | |
{ | |
"words": "AAA BBB CCC", | |
"last": "CCC", | |
"counter": "10" | |
}, | |
{ | |
"words": "AAA BBB", | |
"last": "BBB", | |
"counter": "10" | |
}, | |
{ | |
"words": "AAA", | |
"last": "AAA", | |
"counter": "10" | |
} | |
] | |
}' | |
echo | |
echo | |
echo "define a document accessable by ME (#2) that includes the right tags." | |
curl -XPUT 'http://localhost:9200/test-idx/document/2' -d '{ | |
"title": "Accessable #2 (MY_FRIENDS) with matching tags", | |
"acl": [ "GEORGE", "MY_FRIENDS", "MY_RELATIVES" ], | |
"tags": [ | |
{ | |
"words": "DDD AAA BBB CCC", | |
"last": "CCC", | |
"counter": "8" | |
}, | |
{ | |
"words": "DDD AAA BBB", | |
"last": "BBB", | |
"counter": "8" | |
}, | |
{ | |
"words": "DDD AAA", | |
"last": "AAA", | |
"counter": "8" | |
}, | |
{ | |
"words": "DDD", | |
"last": "DDD", | |
"counter": "8" | |
} | |
] | |
}' | |
echo | |
echo | |
echo define a document accessable by you, but not me. | |
curl -XPUT 'http://localhost:9200/test-idx/document/3' -d '{ | |
"title": "Not accessable", | |
"acl": [ "YOU", "YOUR_FRIENDS", "SUES_FRIENDS", "BILLS_RELATIVES" ], | |
"tags": [ | |
{ | |
"words": "XXX YYY ZZZ", | |
"last": "ZZZ", | |
"counter": "11" | |
}, | |
{ | |
"words": "XXX YYY", | |
"last": "YYY", | |
"counter": "11" | |
}, | |
{ | |
"words": "XXX", | |
"last": "XXX", | |
"counter": "11" | |
} | |
] | |
}' | |
echo | |
echo | |
echo define an extra doc accessable via MY_RELATIVES, but w/ one but not both words in the words field. | |
curl -XPUT 'http://localhost:9200/test-idx/document/4' -d '{ | |
"title": "Accessable #3 (MY_NEIGHBORS), but not the right tags", | |
"acl": [ "MY_NEIGHBORS", "MY_RELATIVES"], | |
"tags": [ | |
{ | |
"words": "EEE FFF CCC", | |
"last": "CCC", | |
"counter": "12" | |
}, | |
{ | |
"words": "EEE FFF", | |
"last": "FFF", | |
"counter": "12" | |
}, | |
{ | |
"words": "EEE", | |
"last": "EEE", | |
"counter": "12" | |
} | |
] | |
}' | |
echo | |
echo refresh | |
curl -XPOST 'http://localhost:9200/test-idx/_refresh' | |
echo | |
echo Dump all documents showing their title. | |
curl -XGET 'http://localhost:9200/test-idx/document/_search?pretty=true' -d '{ | |
"query": { | |
"match_all": { } | |
}, | |
"fields": [ "title" ] | |
}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In response to what seemed like a great example from Igor, I had to build up a larger example. | |
For those wondering, what does this facet solve, first I provide a some explanation. | |
Different top-level documents each can have a series of words, different docs have the _same_ set of set of words. The requirement is to find those with the same set of words (in any order) and the correct "last" word. From those tag records only report the "best" one. The "counter" field has that "best" score. | |
Since we are finding a max, using a facet seemed like it could solve the problem, so my desire to do a query with a nested query and then a facet on the nested. | |
To solve this problem, my nested objects called "tag" in Igor's example included 4 things. | |
1. words: The list of words in an analyzed field. | |
2. last: The last word in the series. | |
3. counter: A numeric value that I needed to find the max value. | |
4. ID: an un-analyzed field that is just the thing to identify which words we matched. | |
It is 1 & 2 that will be part of nested queries; 3 & 4 part of the facet to get the right values. | |
In the example that follows I simplified it and used the existing un-analyzed field "last" instead of adding an "ID" field, because it didn't seem useful to add another field to the example. The problem was originally with my queries, not with the unusual facet where I needed to find a max. | |
The whole tags structure is really an example of building the right data to answer a question. None of the fields were originally part of parent document, they are invented to solve the need. | |
OK, with that out of way, on with the concrete example. | |
I have implemented the following examples, but have not carried my knowledge back to my real code, so I can't yet say where I went wrong with my original search request. When I find out, I'll update this gist with some explanation. At least I believe part of my problem also including a "terms" query which doesn't seem to do what I thought it should, see whatsUpWithNestedTermsQueries.sh below, but there seems to be more to it than that. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo | |
echo "Now, a step along the way." | |
echo "1. Match the outer docs" | |
echo "2. Create a scope of ALL the nested docs within the outer docs" | |
echo "3. Use the scope in a facet." | |
echo 'The 3 hits should include only those with a title starting with "accessable" and the facet should count those, so only AAA-FFF' | |
curl -XGET 'http://localhost:9200/test-idx/document/_search?&pretty=true' -d '{ | |
"query": { | |
"bool": { | |
"must" : [ | |
{ "terms": { "acl": [ "BOB", "MY_RELATIVES", "MY_FRIENDS" ] } }, | |
{ | |
"nested": { | |
"_scope": "my_scope", | |
"path": "tags", | |
"score_mode": "avg", | |
"query" : { | |
"match_all": {} | |
} | |
} | |
} | |
] | |
} | |
}, | |
"fields": [ "title" ], | |
"facets": { | |
"test": { | |
"terms_stats": { | |
"key_field": "tags.last", | |
"value_field": "tags.counter", | |
"order" : "max", | |
"size" : 2147483647 | |
}, | |
"scope": "my_scope" | |
} | |
} | |
}' | |
echo | |
echo My working example looks like: | |
echo "Everything in a bool \"must\" just as Igor had suggested." | |
echo "1. Match the outer doc for any overlap in the ACL." | |
echo "2. The nested objects also must have both \"aaa\" and \"ccc\" in words and \"ccc\" in the special value." | |
echo "Facet on the scope of the nested types." | |
echo "Results should contain the 1st two accessable docs." | |
echo 'The max faceted counter value is in \"accessable #1\" which has the max counter value of 10' | |
curl -XGET 'http://localhost:9200/test-idx/document/_search?&pretty=true' -d '{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ "terms": { "acl": [ "BOB", "MY_RELATIVES", "MY_FRIENDS" ] } }, | |
{ | |
"nested": { | |
"_scope": "my_scope", | |
"path": "tags", | |
"score_mode": "max", | |
"query" : { | |
"bool": { | |
"must" : [ | |
{ "term": { "tags.last": "CCC" } }, | |
{ "term": { "tags.words": "aaa" } }, | |
{ "term": { "tags.words": "ccc" } } | |
] | |
} | |
} | |
} | |
} | |
] | |
} | |
}, | |
"fields": [ "title" ], | |
"facets": { | |
"test": { | |
"terms_stats": { | |
"key_field": "tags.last", | |
"value_field": "tags.counter", | |
"order" : "max", | |
"size" : 2147483647 | |
}, | |
"scope": "my_scope" | |
} | |
} | |
}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Now, a step along the way. | |
1. Match the outer docs | |
2. Create a scope of ALL the nested docs within the outer docs | |
3. Use the scope in a facet. | |
The 3 hits should include only those with a title starting with "accessable" and the facet should count those, so only AAA-FFF | |
{ | |
"took" : 1, | |
"timed_out" : false, | |
"_shards" : { | |
"total" : 5, | |
"successful" : 5, | |
"failed" : 0 | |
}, | |
"hits" : { | |
"total" : 3, | |
"max_score" : 0.8857782, | |
"hits" : [ { | |
"_index" : "test-idx", | |
"_type" : "document", | |
"_id" : "2", | |
"_score" : 0.8857782, | |
"fields" : { | |
"title" : "Accessable #2 (MY_FRIENDS) with matching tags" | |
} | |
}, { | |
"_index" : "test-idx", | |
"_type" : "document", | |
"_id" : "1", | |
"_score" : 0.82578707, | |
"fields" : { | |
"title" : "Accessable #1 (by MY_RELATIVES or MY_FRIENDS) with the right tags." | |
} | |
}, { | |
"_index" : "test-idx", | |
"_type" : "document", | |
"_id" : "4", | |
"_score" : 0.40893662, | |
"fields" : { | |
"title" : "Accessable #3 (MY_NEIGHBORS), but not the right tags" | |
} | |
} ] | |
}, | |
"facets" : { | |
"test" : { | |
"_type" : "terms_stats", | |
"missing" : 0, | |
"terms" : [ { | |
"term" : "CCC", | |
"count" : 3, | |
"total_count" : 3, | |
"min" : 8.0, | |
"max" : 12.0, | |
"total" : 30.0, | |
"mean" : 10.0 | |
}, { | |
"term" : "FFF", | |
"count" : 1, | |
"total_count" : 1, | |
"min" : 12.0, | |
"max" : 12.0, | |
"total" : 12.0, | |
"mean" : 12.0 | |
}, { | |
"term" : "EEE", | |
"count" : 1, | |
"total_count" : 1, | |
"min" : 12.0, | |
"max" : 12.0, | |
"total" : 12.0, | |
"mean" : 12.0 | |
}, { | |
"term" : "BBB", | |
"count" : 2, | |
"total_count" : 2, | |
"min" : 8.0, | |
"max" : 10.0, | |
"total" : 18.0, | |
"mean" : 9.0 | |
}, { | |
"term" : "AAA", | |
"count" : 2, | |
"total_count" : 2, | |
"min" : 8.0, | |
"max" : 10.0, | |
"total" : 18.0, | |
"mean" : 9.0 | |
}, { | |
"term" : "DDD", | |
"count" : 1, | |
"total_count" : 1, | |
"min" : 8.0, | |
"max" : 8.0, | |
"total" : 8.0, | |
"mean" : 8.0 | |
} ] | |
} | |
} | |
} | |
My working example looks like: | |
Everything in a bool "must" just as Igor had suggested. | |
1. Match the outer doc for any overlap in the ACL. | |
2. The nested objects also must have both "aaa" and "ccc" in words and "ccc" as the last value. | |
Facet on the scope of the matching nested "tag" type. | |
Results should contain the 1st two accessable docs. | |
The max faceted counter value is in \"accessable #1\" which has the max counter value of 10. | |
{ | |
"took" : 1, | |
"timed_out" : false, | |
"_shards" : { | |
"total" : 5, | |
"successful" : 5, | |
"failed" : 0 | |
}, | |
"hits" : { | |
"total" : 2, | |
"max_score" : 1.8152345, | |
"hits" : [ { | |
"_index" : "test-idx", | |
"_type" : "document", | |
"_id" : "2", | |
"_score" : 1.8152345, | |
"fields" : { | |
"title" : "Accessable #2 (MY_FRIENDS) with matching tags" | |
} | |
}, { | |
"_index" : "test-idx", | |
"_type" : "document", | |
"_id" : "1", | |
"_score" : 1.574821, | |
"fields" : { | |
"title" : "Accessable #1 (by MY_RELATIVES or MY_FRIENDS) with the right tags." | |
} | |
} ] | |
}, | |
"facets" : { | |
"test" : { | |
"_type" : "terms_stats", | |
"missing" : 0, | |
"terms" : [ { | |
"term" : "CCC", | |
"count" : 2, | |
"total_count" : 2, | |
"min" : 8.0, | |
"max" : 10.0, | |
"total" : 18.0, | |
"mean" : 9.0 | |
} ] | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo But what is up with this next one? I would have thought that the two term queries on the same field in the nested query | |
echo could be replaced with a termS query with some appropriate \"execution\" value. | |
echo but putting in good data does NOT effect the query. | |
echo Notice how the terms in tag.words does NOT limit the hits, so we end up faceting on a 3rd accessible doc that does NOT have \"aaa\" or \"ccc\" in its words. | |
echo Thus getting an incorrect max result of 12 in the facet. | |
echo I think it has something to do with "terms" is really using a filter and somehow that does not affect either the outer hits or the facet. | |
curl -XGET 'http://localhost:9200/test-idx/document/_search?&pretty=true' -d '{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ "terms": { "acl": [ "BOB", "MY_RELATIVES", "MY_FRIENDS" ] } }, | |
{ | |
"nested": { | |
"_scope": "my_scope", | |
"path": "tags", | |
"score_mode": "max", | |
"query" : { | |
"bool": { | |
"must": [ | |
{ "term": { "tags.last": "CCC" } }, | |
{ "terms": { "tags.words": [ "aaa", "ccc" ], "execution": "and" } } | |
] | |
} | |
} | |
} | |
} | |
] | |
} | |
}, | |
"fields": [ "title" ], | |
"facets": { | |
"test": { | |
"terms_stats": { | |
"key_field": "tags.last", | |
"value_field": "tags.counter", | |
"order" : "max", | |
"size" : 2147483647 | |
}, | |
"scope": "my_scope" | |
} | |
} | |
}' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment