Skip to content

Instantly share code, notes, and snippets.

@dptesta
Last active August 29, 2015 13:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dptesta/10688636 to your computer and use it in GitHub Desktop.
Save dptesta/10688636 to your computer and use it in GitHub Desktop.
ElasticSearch: Filtering aggregations on nested types
curl -XDELETE localhost:9200/nested_aggs
curl -XPUT localhost:9200/nested_aggs
curl -XPUT localhost:9200/nested_aggs/user/_mapping -d '
{
"_id" : {"index": "not_analyzed", "path" : "userId"},
"properties": {
"userId": {"type": "string", "index": "not_analyzed"},
"groups": {
"type": "nested",
"properties": {
"groupId": {"type": "string", "index": "not_analyzed"},
"groupRole": {"type": "string", "index": "not_analyzed"},
"groupAdmin": {"type": "string", "index": "not_analyzed"}
}
}
}
}
'
curl -XPUT localhost:9200/nested_aggs/user/User1 -d '
{
"userId": "User1",
"groups": [
{
"groupId": "Group1",
"groupRole": "Role1",
"groupAdmin": "User2"
},
{
"groupId": "Group2",
"groupRole": "Role1",
"groupAdmin": "User2"
},
{
"groupId": "Group3",
"groupRole": "Role1",
"groupAdmin": "User3"
},
{
"groupId": "Group4",
"groupRole": "Role2",
"groupAdmin": "User3"
}
]
}
'
curl localhost:9200/nested_aggs/_search -d '
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "groups",
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"term": {
"groups.groupAdmin": "User2"
}
}
]
}
}
}
}
}
}
}
},
"aggs": {
"nested_groups": {
"nested": {
"path": "groups"
},
"aggs": {
"admin_groups": {
"filter": {
"bool": {
"must": [
{
"term": {
"groups.groupAdmin": "User2"
}
}
]
}
},
"aggs": {
"groupRoles": {
"terms": {
"field": "groupRole"
}
}
}
}
}
}
}
}
'
// counting at nested level, not at parent...
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"nested_groups" : {
"doc_count" : 4,
"admin_groups" : {
"doc_count" : 2,
"groupRoles" : {
"buckets" : [ {
"key" : "Role1",
"doc_count" : 2
} ]
}
}
}
}
}
curl -XDELETE localhost:9200/nested_include
curl -XPUT localhost:9200/nested_include
curl -XPUT localhost:9200/nested_include/user/_mapping -d '
{
"_id" : {"index": "not_analyzed", "path" : "userId"},
"properties": {
"userId": {"type": "string", "index": "not_analyzed"},
"groups": {
"type": "nested",
"include_in_parent": true,
"properties": {
"groupId": {"type": "string", "index": "not_analyzed"},
"groupRole": {"type": "string", "index": "not_analyzed"},
"groupAdmin": {"type": "string", "index": "not_analyzed"}
}
}
}
}
'
curl -XPUT localhost:9200/nested_include/user/User1 -d '
{
"userId": "User1",
"groups": [
{
"groupId": "Group1",
"groupRole": "Role1",
"groupAdmin": "User2"
},
{
"groupId": "Group2",
"groupRole": "Role1",
"groupAdmin": "User2"
},
{
"groupId": "Group3",
"groupRole": "Role1",
"groupAdmin": "User3"
},
{
"groupId": "Group4",
"groupRole": "Role2",
"groupAdmin": "User3"
}
]
}
'
curl localhost:9200/nested_include/_search -d '
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "groups",
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"term": {
"groups.groupAdmin": "User2"
}
}
]
}
}
}
}
}
}
}
},
"aggs": {
"roles": {
"terms": {
"field": "groups.groupRole"
}
}
}
}
'
curl -XDELETE localhost:9200/nested_include_key
curl -XPUT localhost:9200/nested_include_key
curl -XPUT localhost:9200/nested_include_key/user/_mapping -d '
{
"_id" : {"index": "not_analyzed", "path" : "userId"},
"properties": {
"userId": {"type": "string", "index": "not_analyzed"},
"groups": {
"type": "nested",
"include_in_parent": true,
"properties": {
"groupId": {"type": "string", "index": "not_analyzed"},
"groupRole": {"type": "string", "index": "not_analyzed"},
"groupAdmin": {"type": "string", "index": "not_analyzed"},
"roleAdminKey": {"type": "string", "index": "not_analyzed"}
}
}
}
}
'
curl -XPUT localhost:9200/nested_include_key/user/User1 -d '
{
"userId": "User1",
"groups": [
{
"groupId": "Group1",
"groupRole": "Role1",
"groupAdmin": "User2",
"roleAdminKey": "Role1-User2"
},
{
"groupId": "Group2",
"groupRole": "Role1",
"groupAdmin": "User2",
"roleAdminKey": "Role1-User2"
},
{
"groupId": "Group3",
"groupRole": "Role1",
"groupAdmin": "User3",
"roleAdminKey": "Role1-User3"
},
{
"groupId": "Group4",
"groupRole": "Role2",
"groupAdmin": "User3",
"roleAdminKey": "Role2-User3"
}
]
}
'
curl localhost:9200/nested_include_key/_search -d '
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "groups",
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"term": {
"groups.groupAdmin": "User2"
}
}
]
}
}
}
}
}
}
}
},
"aggs": {
"roles": {
"terms": {
"field": "groups.roleAdminKey",
"include": ".*User2.*"
}
}
}
}
'
// workaround, add "*Key" fields to docs and aggregate on that at parent level.
// Apply filter using regular expression in "include" parameter instead.
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"roles" : {
"buckets" : [ {
"key" : "Role1-User2",
"doc_count" : 1
} ]
}
}
}
// counts at parent level, but can't apply nested filter
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"roles" : {
"buckets" : [ {
"key" : "Role1",
"doc_count" : 1
}, {
"key" : "Role2",
"doc_count" : 1
} ]
}
}
}
@dptesta
Copy link
Author

dptesta commented Apr 15, 2014

I am trying to use aggregations on nested type. I want to apply a nested filter to apply aggs to a subset of docs, but when I do that the "terms" aggregation counts nested docs, not parent docs. In other words, if same term is found in two different nested documents belong to the same parent, count will be two. I want it to count only once per parent document. Using "include_in_parent" does not help because then I cannot apply the nested filter.

I can workaround this by inserting my filter into "_Key" fields, but my actual mapping contains multiple levels of nested documents and I want to be able to apply all kinds of filtered nested aggregations without having to create numerous "_Key" fields.

Is there a way to achieve this?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment