Create an index with arr_1
as an ordinary analyzed field, and arr_2
as the same, but with a position_offset_gap
:
curl -XPUT 'http://127.0.0.1:9200/test/?pretty=1' -d '
{
"mappings" : {
"test" : {
"properties" : {
"arr_2" : {
"position_offset_gap" : 100,
"type" : "string"
},
"arr_1" : {
"type" : "string"
}
}
}
}
}
'
Index two identical arrays:
curl -XPOST 'http://127.0.0.1:9200/test/test?pretty=1' -d '
{
"arr_2" : [
"foo",
"bar"
],
"arr_1" : [
"foo",
"bar"
]
}
'
curl -XPOST 'http://127.0.0.1:9200/test/_refresh?pretty=1'
A phrase search on arr_1
works:
curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1' -d '
{
"query" : {
"match_phrase" : {
"arr_1" : {
"query" : "foo bar"
}
}
}
}
'
# {
# "hits" : {
# "hits" : [
# {
# "_source" : {
# "arr_2" : [
# "foo",
# "bar"
# ],
# "arr_1" : [
# "foo",
# "bar"
# ]
# },
# "_score" : 0.38356602,
# "_index" : "test",
# "_id" : "FuKpWmm9SR6lE97GOsL84Q",
# "_type" : "test"
# }
# ],
# "max_score" : 0.38356602,
# "total" : 1
# },
# "timed_out" : false,
# "_shards" : {
# "failed" : 0,
# "successful" : 5,
# "total" : 5
# },
# "took" : 4
# }
A phrase search on arr_1
with the words in the wrong order fails:
curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1' -d '
{
"query" : {
"match_phrase" : {
"arr_1" : {
"query" : "bar foo"
}
}
}
}
'
# {
# "hits" : {
# "hits" : [],
# "max_score" : null,
# "total" : 0
# },
# "timed_out" : false,
# "_shards" : {
# "failed" : 0,
# "successful" : 5,
# "total" : 5
# },
# "took" : 3
# }
Unless we increase the slop
:
curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1' -d '
{
"query" : {
"match_phrase" : {
"arr_1" : {
"query" : "bar foo",
"slop" : 2
}
}
}
}
'
# {
# "hits" : {
# "hits" : [
# {
# "_source" : {
# "arr_2" : [
# "foo",
# "bar"
# ],
# "arr_1" : [
# "foo",
# "bar"
# ]
# },
# "_score" : 0.22145195,
# "_index" : "test",
# "_id" : "FuKpWmm9SR6lE97GOsL84Q",
# "_type" : "test"
# }
# ],
# "max_score" : 0.22145195,
# "total" : 1
# },
# "timed_out" : false,
# "_shards" : {
# "failed" : 0,
# "successful" : 5,
# "total" : 5
# },
# "took" : 2
# }
A phrase query on arr_2
fails:
curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1' -d '
{
"query" : {
"match_phrase" : {
"arr_2" : {
"query" : "foo bar"
}
}
}
}
'
# {
# "hits" : {
# "hits" : [],
# "max_score" : null,
# "total" : 0
# },
# "timed_out" : false,
# "_shards" : {
# "failed" : 0,
# "successful" : 5,
# "total" : 5
# },
# "took" : 4
# }
Even with slop:
curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1' -d '
{
"query" : {
"match_phrase" : {
"arr_2" : {
"query" : "foo bar",
"slop" : 2
}
}
}
}
'
# {
# "hits" : {
# "hits" : [],
# "max_score" : null,
# "total" : 0
# },
# "timed_out" : false,
# "_shards" : {
# "failed" : 0,
# "successful" : 5,
# "total" : 5
# },
# "took" : 2
# }
Unless we increase the slop enough to take the position_offset_gap
into account:
curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1' -d '
{
"query" : {
"match_phrase" : {
"arr_2" : {
"query" : "foo bar",
"slop" : 100
}
}
}
}
'
# {
# "hits" : {
# "hits" : [
# {
# "_source" : {
# "arr_2" : [
# "foo",
# "bar"
# ],
# "arr_1" : [
# "foo",
# "bar"
# ]
# },
# "_score" : 0.038166247,
# "_index" : "test",
# "_id" : "FuKpWmm9SR6lE97GOsL84Q",
# "_type" : "test"
# }
# ],
# "max_score" : 0.038166247,
# "total" : 1
# },
# "timed_out" : false,
# "_shards" : {
# "failed" : 0,
# "successful" : 5,
# "total" : 5
# },
# "took" : 3
# }