ineffective token_filter example: doc with two terms comes back in a terms_set query where only one term is supplied
PUT my_index | |
{ | |
"mappings": { | |
"doc": { | |
"properties": { | |
"securityTags": { | |
"type": "keyword", | |
"fields": { | |
"length": { | |
"type": "token_count", | |
"analyzer": "standard" | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
POST /my_index/doc | |
{ | |
"sensitiveData": "Dave Erickson lives in Wahshington DC", | |
"securityTags": ["DaveLocation", "DaveFullName"] | |
} | |
# this should return no hits since we're only supplying one term | |
GET my_index/_search | |
{ | |
"query": { | |
"terms_set": { | |
"securityTags": { | |
"terms": ["DaveLocation"], | |
"minimum_should_match_field": "securityTags.length" | |
} | |
} | |
} | |
} | |
# this demonstrates that the token_count is not counting the total tokens in the securityTag list | |
GET my_index/_search | |
{ | |
"query": { | |
"term": { | |
"securityTags.length": { | |
"value": 1 | |
} | |
} | |
} | |
} | |
# see if there is a single value for securityTags.length by putting a new doc with a two-token term | |
POST /my_index/doc | |
{ | |
"sensitiveData": "Dave Erickson lives in Wahshington DC", | |
"securityTags": ["DaveLocation", "DaveFullName", "two tokens"] | |
} | |
# looks like we get all the docs | |
GET my_index/_search | |
{ | |
"query": { | |
"term": { | |
"securityTags.length": { | |
"value": 1 | |
} | |
} | |
} | |
} | |
# ...and that a length value is stored for each securityTag: the fields mapping turns securityTag into an object with all its lucene-flattening consequences | |
GET my_index/_search | |
{ | |
"query": { | |
"term": { | |
"securityTags.length": { | |
"value": 2 | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment