In my sport index, I have the following documents indexed as football_team type:
POST sport/football_team
{
"name": "Real Madrid",
"defense_strength": 87.4,
"middle_strength": 90.34,
"forward_strength": 98.34,
"player_ids": [
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"
]
}
POST sport/football_team
{
"name": "Real Madrid",
"defense_strength": 88.2,
"middle_strength": 92.34,
"forward_strength": 97.45,
"player_ids": [
"1", "2", "3", "4", "21", "6", "7", "8", "9", "10", "11"
]
}
POST sport/football_team
{
"name": "Real Madrid",
"defense_strength": 82.1,
"middle_strength": 91.23,
"forward_strength": 91.34,
"player_ids": [
"1", "19", "3", "4", "7", "6", "7", "8", "9", "10", "11"
]
}
POST sport/football_team
{
"name": "Barcelona",
"defense_strength": 76.34,
"middle_strength": 93.23,
"forward_strength": 96.44,
"player_ids": [
"1", "78", "32", "4", "7", "23", "7", "18", "9", "10", "66"
]
}
POST sport/football_team
{
"name": "Barcelona",
"defense_strength": 79.34,
"middle_strength": 92.43,
"forward_strength": 98.32,
"player_ids": [
"1", "78", "98", "4", "7", "23", "7", "18", "9", "10", "66"
]
}
POST sport/football_team
{
"name": "Arsenal",
"defense_strength": 81.23,
"middle_strength": 89.33,
"forward_strength": 92.34,
"player_ids": [
"1", "34", "44", "4", "7", "90", "7", "18", "9", "10", "34"
]
}
POST sport/football_team
{
"name": "Arsenal",
"defense_strength": 82.34,
"middle_strength": 88.34,
"forward_strength": 94.3,
"player_ids": [
"1", "34", "44", "4", "7", "90", "7", "18", "9", "55", "34"
]
}
Here, each football team has a name and some strength values. Besides that, there is a player_ids collection for each team. The team stregth has been calculated by taking the avarage of players' strengths during the ETL process. You can also see that there are multiple football teams with the same name here but the player_ids collection is different.
When we run the following query:
POST sport/_search
{
"sort": [
{
"forward_strength": {
"order": "desc"
}
}
],
"filter": {
"bool": {
"must": [
{
"range": {
"defense_strength": {
"lte": 83.43
}
}
},
{
"range": {
"forward_strength": {
"gte": 91
}
}
}
]
}
}
}
We will get the following result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": null,
"hits": [
{
"_index": "sport",
"_type": "football_team",
"_id": "HfztrM35SNCgedZ651ZYpQ",
"_score": null,
"_source": {
"name": "Barcelona",
"defense_strength": 79.34,
"middle_strength": 92.43,
"forward_strength": 98.32,
"player_ids": [
"1",
"78",
"98",
"4",
"7",
"23",
"7",
"18",
"9",
"10",
"66"
]
},
"sort": [
98.32
]
},
{
"_index": "sport",
"_type": "football_team",
"_id": "q4WM6L89SMm7pAmidO_tOQ",
"_score": null,
"_source": {
"name": "Barcelona",
"defense_strength": 76.34,
"middle_strength": 93.23,
"forward_strength": 96.44,
"player_ids": [
"1",
"78",
"32",
"4",
"7",
"23",
"7",
"18",
"9",
"10",
"66"
]
},
"sort": [
96.44
]
},
{
"_index": "sport",
"_type": "football_team",
"_id": "nJbKUBY7RNGqCS2OmMU0jw",
"_score": null,
"_source": {
"name": "Arsenal",
"defense_strength": 82.34,
"middle_strength": 88.34,
"forward_strength": 94.3,
"player_ids": [
"1",
"34",
"44",
"4",
"7",
"90",
"7",
"18",
"9",
"55",
"34"
]
},
"sort": [
94.3
]
},
{
"_index": "sport",
"_type": "football_team",
"_id": "yqDdRUGKRLKAhRyjqj-uPw",
"_score": null,
"_source": {
"name": "Arsenal",
"defense_strength": 81.23,
"middle_strength": 89.33,
"forward_strength": 92.34,
"player_ids": [
"1",
"34",
"44",
"4",
"7",
"90",
"7",
"18",
"9",
"10",
"34"
]
},
"sort": [
92.34
]
},
{
"_index": "sport",
"_type": "football_team",
"_id": "7wap_Ol-SrG5MnnpL7DYIw",
"_score": null,
"_source": {
"name": "Real Madrid",
"defense_strength": 82.1,
"middle_strength": 91.23,
"forward_strength": 91.34,
"player_ids": [
"1",
"19",
"3",
"4",
"7",
"6",
"7",
"8",
"9",
"10",
"11"
]
},
"sort": [
91.34
]
}
]
}
}
Which is expected. However, what I would like to get here is top 1 row of each group (grouped by the team name). The result I would like to get for the above query is this:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": null,
"hits": [
{
"_index": "sport",
"_type": "football_team",
"_id": "HfztrM35SNCgedZ651ZYpQ",
"_score": null,
"_source": {
"name": "Barcelona",
"defense_strength": 79.34,
"middle_strength": 92.43,
"forward_strength": 98.32,
"player_ids": [
"1",
"78",
"98",
"4",
"7",
"23",
"7",
"18",
"9",
"10",
"66"
]
},
"sort": [
98.32
]
},
{
"_index": "sport",
"_type": "football_team",
"_id": "nJbKUBY7RNGqCS2OmMU0jw",
"_score": null,
"_source": {
"name": "Arsenal",
"defense_strength": 82.34,
"middle_strength": 88.34,
"forward_strength": 94.3,
"player_ids": [
"1",
"34",
"44",
"4",
"7",
"90",
"7",
"18",
"9",
"55",
"34"
]
},
"sort": [
94.3
]
},
{
"_index": "sport",
"_type": "football_team",
"_id": "7wap_Ol-SrG5MnnpL7DYIw",
"_score": null,
"_source": {
"name": "Real Madrid",
"defense_strength": 82.1,
"middle_strength": 91.23,
"forward_strength": 91.34,
"player_ids": [
"1",
"19",
"3",
"4",
"7",
"6",
"7",
"8",
"9",
"10",
"11"
]
},
"sort": [
91.34
]
}
]
}
}
Note: I can do this at the client side if I have this small amount of data but in my case, I have millions of records like this and I cam doing pagination as well.