Last active
December 20, 2015 14:09
-
-
Save brian-from-fl/6144031 to your computer and use it in GitHub Desktop.
ElasticSearch facets with combination hierarchies
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
If the term facet contains "fields" : [ "field1", "field2", ... ] | |
instead of just "field" : "field_name" then the individual counts for | |
each field are grouped under that one facet's name. But otherwise, there | |
is no combining and grouping. For instance, there are no counts of | |
(married and female). A combination of a script and some form of | |
post-processing is needed for this. | |
My inspiration for the solution came | |
from http://elasticsearch-users.115913.n3.nabble.com/facet-and-grouping-td4020055.html | |
(specifically, Sujoy's comment). | |
While Sujoy used JavaScript for the post-processing, I just used Java as | |
that's how the rest of my query and response handling is done. When | |
receiving the response, my Java code didn't have too much work to do. | |
Use of LinkedHashMap to allow fast lookups to find, for example, a state | |
in order to add its city word, also ensured that the order of the terms | |
as returned by ElasticSearch was preserved exactly. | |
This gist contains two files, Query and Response | |
The Query contains the script. | |
The Response contains the complete response, along with the JSON that my | |
Java code added. Here are some snippets from the Response: | |
1. Under "facets", I synthesized "state_city_combinations" with the | |
resulting hierarchy. | |
2. Then under "terms" I kept the original response by ElasticSearch for | |
verification purposes. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"from" : 0, | |
"size" : 50000, | |
"query" : { | |
"match_all" : { } | |
}, | |
"version" : true, | |
"explain" : false, | |
"fields" : [ "_ttl", "_source" ], | |
"facets" : { | |
"state_city_combinations" : { | |
"terms" : { | |
"size" : 100, | |
"script" : "doc['state'].value + \"~~~\" + doc['city'].value" | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"facets" : { | |
"state_city_combinations" : { | |
"_type" : "terms", | |
"total" : 25376, | |
"other" : 24313, | |
"missing" : 0, | |
"combinations" : { | |
"fl" : { | |
"beach" : 61, | |
"lake" : 23, | |
"citi" : 21, | |
"estat" : 10, | |
"fort" : 7, | |
"island" : 6, | |
"bay" : 6 | |
}, | |
"tx" : { | |
"citi" : 36, | |
"creek" : 10, | |
"oak" : 9, | |
"la" : 9, | |
"hill" : 7, | |
"park" : 6, | |
"grove" : 6, | |
"cross" : 5, | |
"acr" : 5, | |
"lake" : 4, | |
"estat" : 4, | |
"canyon" : 4, | |
"alto" : 4 | |
}, | |
"mo" : { | |
"citi" : 36 | |
}, | |
"il" : { | |
"citi" : 29, | |
"lake" : 13, | |
"grove" : 11, | |
"hill" : 7 | |
}, | |
"mn" : { | |
"lake" : 27, | |
"fall" : 7 | |
}, | |
"wi" : { | |
"lake" : 25, | |
"fall" : 6, | |
"citi" : 6, | |
"bay" : 6 | |
}, | |
"ny" : { | |
"east" : 25, | |
"fall" : 18, | |
"lake" : 13, | |
"beach" : 7, | |
"harbor" : 6, | |
"bay" : 6, | |
"point" : 5, | |
"north" : 5, | |
"hill" : 5, | |
"height" : 5 | |
}, | |
"ia" : { | |
"citi" : 23 | |
}, | |
"ca" : { | |
"citi" : 22, | |
"beach" : 20, | |
"hill" : 17, | |
"east" : 9, | |
"lake" : 8, | |
"el" : 7 | |
}, | |
"pa" : { | |
"citi" : 19, | |
"east" : 17, | |
"hill" : 15, | |
"height" : 11, | |
"new" : 7, | |
"mount" : 5, | |
"grove" : 5, | |
"glen" : 4, | |
"beaver" : 4 | |
}, | |
"ok" : { | |
"citi" : 18, | |
"grove" : 6, | |
"creek" : 6 | |
}, | |
"oh" : { | |
"citi" : 17, | |
"hill" : 14, | |
"height" : 14, | |
"new" : 9, | |
"center" : 9, | |
"north" : 7, | |
"lake" : 6, | |
"mount" : 5 | |
}, | |
"wa" : { | |
"lake" : 16, | |
"citi" : 6, | |
"east" : 4, | |
"creek" : 4 | |
}, | |
"or" : { | |
"citi" : 14, | |
"bay" : 4 | |
}, | |
"mi" : { | |
"lake" : 13, | |
"citi" : 11, | |
"beach" : 6 | |
}, | |
"ks" : { | |
"citi" : 13 | |
}, | |
"in" : { | |
"citi" : 13, | |
"lake" : 5 | |
}, | |
"ak" : { | |
"bay" : 13 | |
}, | |
"tn" : { | |
"citi" : 10, | |
"hill" : 6 | |
}, | |
"nj" : { | |
"lake" : 9, | |
"citi" : 9, | |
"beach" : 8 | |
}, | |
"ne" : { | |
"citi" : 9 | |
}, | |
"nc" : { | |
"citi" : 9, | |
"beach" : 7, | |
"lake" : 5 | |
}, | |
"ky" : { | |
"hill" : 7 | |
}, | |
"ga" : { | |
"citi" : 7 | |
}, | |
"ut" : { | |
"citi" : 6 | |
}, | |
"sd" : { | |
"citi" : 6, | |
"lake" : 4 | |
}, | |
"ct" : { | |
"center" : 6 | |
}, | |
"al" : { | |
"citi" : 6 | |
}, | |
"va" : { | |
"hill" : 4, | |
"citi" : 4 | |
}, | |
"pr" : { | |
"las" : 4 | |
} | |
}, | |
"terms" : [ { | |
"term" : "fl~~~beach", | |
"count" : 61 | |
}, { | |
"term" : "tx~~~citi", | |
"count" : 36 | |
}, { | |
"term" : "mo~~~citi", | |
"count" : 36 | |
}, { | |
"term" : "il~~~citi", | |
"count" : 29 | |
}, { | |
"term" : "mn~~~lake", | |
"count" : 27 | |
}, { | |
"term" : "wi~~~lake", | |
"count" : 25 | |
}, { | |
"term" : "ny~~~east", | |
"count" : 25 | |
}, { | |
"term" : "ia~~~citi", | |
"count" : 23 | |
}, { | |
"term" : "fl~~~lake", | |
"count" : 23 | |
}, { | |
"term" : "ca~~~citi", | |
"count" : 22 | |
}, { | |
"term" : "fl~~~citi", | |
"count" : 21 | |
}, { | |
"term" : "ca~~~beach", | |
"count" : 20 | |
}, { | |
"term" : "pa~~~citi", | |
"count" : 19 | |
}, { | |
"term" : "ok~~~citi", | |
"count" : 18 | |
}, { | |
"term" : "ny~~~fall", | |
"count" : 18 | |
}, { | |
"term" : "pa~~~east", | |
"count" : 17 | |
}, { | |
"term" : "oh~~~citi", | |
"count" : 17 | |
}, { | |
"term" : "ca~~~hill", | |
"count" : 17 | |
}, { | |
"term" : "wa~~~lake", | |
"count" : 16 | |
}, { | |
"term" : "pa~~~hill", | |
"count" : 15 | |
}, { | |
"term" : "or~~~citi", | |
"count" : 14 | |
}, { | |
"term" : "oh~~~hill", | |
"count" : 14 | |
}, { | |
"term" : "oh~~~height", | |
"count" : 14 | |
}, { | |
"term" : "ny~~~lake", | |
"count" : 13 | |
}, { | |
"term" : "mi~~~lake", | |
"count" : 13 | |
}, { | |
"term" : "ks~~~citi", | |
"count" : 13 | |
}, { | |
"term" : "in~~~citi", | |
"count" : 13 | |
}, { | |
"term" : "il~~~lake", | |
"count" : 13 | |
}, { | |
"term" : "ak~~~bay", | |
"count" : 13 | |
}, { | |
"term" : "pa~~~height", | |
"count" : 11 | |
}, { | |
"term" : "mi~~~citi", | |
"count" : 11 | |
}, { | |
"term" : "il~~~grove", | |
"count" : 11 | |
}, { | |
"term" : "tx~~~creek", | |
"count" : 10 | |
}, { | |
"term" : "tn~~~citi", | |
"count" : 10 | |
}, { | |
"term" : "fl~~~estat", | |
"count" : 10 | |
}, { | |
"term" : "tx~~~oak", | |
"count" : 9 | |
}, { | |
"term" : "tx~~~la", | |
"count" : 9 | |
}, { | |
"term" : "oh~~~new", | |
"count" : 9 | |
}, { | |
"term" : "oh~~~center", | |
"count" : 9 | |
}, { | |
"term" : "nj~~~lake", | |
"count" : 9 | |
}, { | |
"term" : "nj~~~citi", | |
"count" : 9 | |
}, { | |
"term" : "ne~~~citi", | |
"count" : 9 | |
}, { | |
"term" : "nc~~~citi", | |
"count" : 9 | |
}, { | |
"term" : "ca~~~east", | |
"count" : 9 | |
}, { | |
"term" : "nj~~~beach", | |
"count" : 8 | |
}, { | |
"term" : "ca~~~lake", | |
"count" : 8 | |
}, { | |
"term" : "tx~~~hill", | |
"count" : 7 | |
}, { | |
"term" : "pa~~~new", | |
"count" : 7 | |
}, { | |
"term" : "oh~~~north", | |
"count" : 7 | |
}, { | |
"term" : "ny~~~beach", | |
"count" : 7 | |
}, { | |
"term" : "nc~~~beach", | |
"count" : 7 | |
}, { | |
"term" : "mn~~~fall", | |
"count" : 7 | |
}, { | |
"term" : "ky~~~hill", | |
"count" : 7 | |
}, { | |
"term" : "il~~~hill", | |
"count" : 7 | |
}, { | |
"term" : "ga~~~citi", | |
"count" : 7 | |
}, { | |
"term" : "fl~~~fort", | |
"count" : 7 | |
}, { | |
"term" : "ca~~~el", | |
"count" : 7 | |
}, { | |
"term" : "wi~~~fall", | |
"count" : 6 | |
}, { | |
"term" : "wi~~~citi", | |
"count" : 6 | |
}, { | |
"term" : "wi~~~bay", | |
"count" : 6 | |
}, { | |
"term" : "wa~~~citi", | |
"count" : 6 | |
}, { | |
"term" : "ut~~~citi", | |
"count" : 6 | |
}, { | |
"term" : "tx~~~park", | |
"count" : 6 | |
}, { | |
"term" : "tx~~~grove", | |
"count" : 6 | |
}, { | |
"term" : "tn~~~hill", | |
"count" : 6 | |
}, { | |
"term" : "sd~~~citi", | |
"count" : 6 | |
}, { | |
"term" : "ok~~~grove", | |
"count" : 6 | |
}, { | |
"term" : "ok~~~creek", | |
"count" : 6 | |
}, { | |
"term" : "oh~~~lake", | |
"count" : 6 | |
}, { | |
"term" : "ny~~~harbor", | |
"count" : 6 | |
}, { | |
"term" : "ny~~~bay", | |
"count" : 6 | |
}, { | |
"term" : "mi~~~beach", | |
"count" : 6 | |
}, { | |
"term" : "fl~~~island", | |
"count" : 6 | |
}, { | |
"term" : "fl~~~bay", | |
"count" : 6 | |
}, { | |
"term" : "ct~~~center", | |
"count" : 6 | |
}, { | |
"term" : "al~~~citi", | |
"count" : 6 | |
}, { | |
"term" : "tx~~~cross", | |
"count" : 5 | |
}, { | |
"term" : "tx~~~acr", | |
"count" : 5 | |
}, { | |
"term" : "pa~~~mount", | |
"count" : 5 | |
}, { | |
"term" : "pa~~~grove", | |
"count" : 5 | |
}, { | |
"term" : "oh~~~mount", | |
"count" : 5 | |
}, { | |
"term" : "ny~~~point", | |
"count" : 5 | |
}, { | |
"term" : "ny~~~north", | |
"count" : 5 | |
}, { | |
"term" : "ny~~~hill", | |
"count" : 5 | |
}, { | |
"term" : "ny~~~height", | |
"count" : 5 | |
}, { | |
"term" : "nc~~~lake", | |
"count" : 5 | |
}, { | |
"term" : "in~~~lake", | |
"count" : 5 | |
}, { | |
"term" : "wa~~~east", | |
"count" : 4 | |
}, { | |
"term" : "wa~~~creek", | |
"count" : 4 | |
}, { | |
"term" : "va~~~hill", | |
"count" : 4 | |
}, { | |
"term" : "va~~~citi", | |
"count" : 4 | |
}, { | |
"term" : "tx~~~lake", | |
"count" : 4 | |
}, { | |
"term" : "tx~~~estat", | |
"count" : 4 | |
}, { | |
"term" : "tx~~~canyon", | |
"count" : 4 | |
}, { | |
"term" : "tx~~~alto", | |
"count" : 4 | |
}, { | |
"term" : "sd~~~lake", | |
"count" : 4 | |
}, { | |
"term" : "pr~~~las", | |
"count" : 4 | |
}, { | |
"term" : "pa~~~glen", | |
"count" : 4 | |
}, { | |
"term" : "pa~~~beaver", | |
"count" : 4 | |
}, { | |
"term" : "or~~~bay", | |
"count" : 4 | |
} ] | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment