Created
May 28, 2021 22:34
-
-
Save FrankHassanabad/e15dc17b7b63982eabd185bd8be903d9 to your computer and use it in GitHub Desktop.
Slow queries using runtime fields to split against "data_stream.dataset" to create "event.module"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create an index which has a runtime field to do a split against a "constant_keyword" | |
# of "data_stream.dataset" and use the first value found as the "event.module" | |
# I add an additional "host.name" type of keyword to compare | |
# * Profile of aggregating against the runtime field which does a split against the "constant_keyword" | |
# * Profile of aggregating against the const_keyword | |
# * Profile of aggregating against a normal keyword | |
# * Profile against a field alias against the "constant_keyword" field | |
DELETE const-logs-frank-delme-1 | |
PUT const-logs-frank-delme-1 | |
{ | |
"mappings": { | |
"dynamic": "false", | |
"runtime": { | |
"event.module": { | |
"type": "keyword", | |
"script": { | |
"source": """ | |
def split_module=doc['data_stream.dataset'].value.splitOnToken('.'); | |
emit(split_module[0]) | |
""" | |
} | |
} | |
}, | |
"properties": { | |
"@timestamp": { | |
"type": "date" | |
}, | |
"message": { | |
"type": "text" | |
}, | |
"host": { | |
"properties": { | |
"name": { | |
"type": "keyword" | |
} | |
} | |
}, | |
"data_stream": { | |
"properties": { | |
"dataset": { | |
"type": "constant_keyword", | |
"value": "nginx.access" | |
} | |
} | |
}, | |
"event_alias_test": { | |
"properties": { | |
"dataset": { | |
"type": "alias", | |
"path": "data_stream.dataset" | |
} | |
} | |
} | |
} | |
} | |
} | |
# Re-index two auditbeat indexes from estc to get us close to 1 million records for a profile run | |
POST _reindex?wait_for_completion=false | |
{ | |
"source": { | |
"index": "auditbeat-8.0.0-2021.04.26-000007,auditbeat-8.0.0-2021.05.26-000008" | |
}, | |
"dest": { | |
"index": "const-logs-frank-delme-1" | |
} | |
} | |
# Count should around 941,317 once indexing is complete. | |
GET const-logs-frank-delme-1/_count | |
# Profile run against the runtime field | |
GET const-logs-frank-delme-1/_search?request_cache=false&human=true | |
{ | |
"size": 0, | |
"profile": true, | |
"track_total_hits": false, | |
"aggs": { | |
"event_module": { | |
"terms": { | |
"field": "event.module" | |
} | |
} | |
} | |
} | |
# Profile run against the host.name keyword field | |
GET const-logs-frank-delme-1/_search?request_cache=false&human=true | |
{ | |
"size": 0, | |
"profile": true, | |
"track_total_hits": false, | |
"aggs": { | |
"event_module": { | |
"terms": { | |
"field": "host.name" | |
} | |
} | |
} | |
} | |
# Profile run against the constant_keyword field directly | |
GET const-logs-frank-delme-1/_search?request_cache=false&human=true | |
{ | |
"size": 0, | |
"profile": true, | |
"track_total_hits": false, | |
"aggs": { | |
"event_module": { | |
"terms": { | |
"field": "data_stream.dataset" | |
} | |
} | |
} | |
} | |
# Profile against a field alias against the "constant_keyword" field | |
GET const-logs-frank-delme-1/_search?request_cache=false&human=true | |
{ | |
"size": 0, | |
"profile": true, | |
"track_total_hits": false, | |
"aggs": { | |
"event_alias_test": { | |
"terms": { | |
"field": "event_alias_test.dataset" | |
} | |
} | |
} | |
} |
This is still faster and it is not using the constant_keyword but rather just regular keyword for host.name
:
# Profile run against the host.name keyword field
GET const-logs-frank-delme-1/_search?request_cache=false&human=true
{
"size": 0,
"profile": true,
"track_total_hits": false,
"aggs": {
"event_module": {
"terms": {
"field": "host.name"
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"event_module" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "<redacted>", <-- redacted
"doc_count" : 758221
},
{
"key" : "<redacted>", <-- redacted
"doc_count" : 178220
},
{
"key" : "<redacted>", <-- redacted
"doc_count" : 5196
}
]
}
},
"profile" : {
"shards" : [
{
"id" : "[6zKvMLm3RQOlBuzH_74A1g][const-logs-frank-delme-1][0]",
"searches" : [
{
"query" : [
{
"type" : "MatchAllDocsQuery",
"description" : "*:*",
"time" : "443nanos",
"time_in_nanos" : 443,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 0,
"match" : 0,
"next_doc_count" : 0,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 0,
"advance_count" : 0,
"score" : 0,
"build_scorer_count" : 0,
"create_weight" : 443,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 0
}
}
],
"rewrite_time" : 1836,
"collector" : [
{
"name" : "MultiCollector",
"reason" : "search_multi",
"time" : "220.6micros",
"time_in_nanos" : 220630,
"children" : [
{
"name" : "EarlyTerminatingCollector",
"reason" : "search_count",
"time" : "58.3micros",
"time_in_nanos" : 58399
},
{
"name" : "MultiBucketCollector: [org.elasticsearch.search.aggregations.MultiBucketCollector$1@117ae383]",
"reason" : "aggregation",
"time" : "128micros",
"time_in_nanos" : 128050
}
]
}
]
}
],
"aggregations" : [
{
"type" : "StringTermsAggregatorFromFilters",
"description" : "event_module",
"time" : "95.8micros",
"time_in_nanos" : 95802,
"breakdown" : {
"reduce" : 0,
"post_collection_count" : 1,
"build_leaf_collector" : 88649,
"build_aggregation" : 5577,
"build_aggregation_count" : 1,
"build_leaf_collector_count" : 12,
"post_collection" : 431,
"initialize" : 1145,
"initialize_count" : 1,
"reduce_count" : 0,
"collect" : 0,
"collect_count" : 0
},
"debug" : {
"delegate" : "FiltersAggregator.FilterByFilter",
"delegate_debug" : {
"segments_with_doc_count_field" : 0,
"segments_with_deleted_docs" : 0,
"filters" : [
{
"results_from_metadata" : 12,
"query" : "host.name:akroh-windows",
"scorers_prepared_while_estimating_cost" : 0,
"specialized_for" : "term"
},
{
"results_from_metadata" : 12,
"query" : "host.name:bastion00",
"scorers_prepared_while_estimating_cost" : 0,
"specialized_for" : "term"
},
{
"results_from_metadata" : 12,
"query" : "host.name:kibana00",
"scorers_prepared_while_estimating_cost" : 0,
"specialized_for" : "term"
}
],
"segments_counted" : 12,
"segments_collected" : 0
}
}
}
]
}
]
}
}
Directly against the constant_keyword
is fast
# Profile run against the constant_keyword field directly
GET const-logs-frank-delme-1/_search?request_cache=false&human=true
{
"size": 0,
"profile": true,
"track_total_hits": false,
"aggs": {
"event_module": {
"terms": {
"field": "data_stream.dataset"
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"event_module" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "nginx.access",
"doc_count" : 941637
}
]
}
},
"profile" : {
"shards" : [
{
"id" : "[6zKvMLm3RQOlBuzH_74A1g][const-logs-frank-delme-1][0]",
"searches" : [
{
"query" : [
{
"type" : "MatchAllDocsQuery",
"description" : "*:*",
"time" : "430nanos",
"time_in_nanos" : 430,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 0,
"match" : 0,
"next_doc_count" : 0,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 0,
"advance_count" : 0,
"score" : 0,
"build_scorer_count" : 0,
"create_weight" : 430,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 0
}
}
],
"rewrite_time" : 1470,
"collector" : [
{
"name" : "MultiCollector",
"reason" : "search_multi",
"time" : "144.6micros",
"time_in_nanos" : 144619,
"children" : [
{
"name" : "EarlyTerminatingCollector",
"reason" : "search_count",
"time" : "60.8micros",
"time_in_nanos" : 60804
},
{
"name" : "MultiBucketCollector: [org.elasticsearch.search.aggregations.MultiBucketCollector$1@44cea343]",
"reason" : "aggregation",
"time" : "48.6micros",
"time_in_nanos" : 48670
}
]
}
]
}
],
"aggregations" : [
{
"type" : "StringTermsAggregatorFromFilters",
"description" : "event_module",
"time" : "17.4micros",
"time_in_nanos" : 17442,
"breakdown" : {
"reduce" : 0,
"post_collection_count" : 1,
"build_leaf_collector" : 10512,
"build_aggregation" : 4870,
"build_aggregation_count" : 1,
"build_leaf_collector_count" : 12,
"post_collection" : 446,
"initialize" : 1614,
"initialize_count" : 1,
"reduce_count" : 0,
"collect" : 0,
"collect_count" : 0
},
"debug" : {
"delegate" : "FiltersAggregator.FilterByFilter",
"delegate_debug" : {
"segments_with_doc_count_field" : 0,
"segments_with_deleted_docs" : 0,
"filters" : [
{
"results_from_metadata" : 12,
"query" : "*:*",
"scorers_prepared_while_estimating_cost" : 0,
"specialized_for" : "match_all"
}
],
"segments_counted" : 12,
"segments_collected" : 0
}
}
}
]
}
]
}
}
Using an alias against the constant_keyword
is still fast
# Profile against a field alias against the "constant_keyword" field
GET const-logs-frank-delme-1/_search?request_cache=false&human=true
{
"size": 0,
"profile": true,
"track_total_hits": false,
"aggs": {
"event_alias_test": {
"terms": {
"field": "event_alias_test.dataset"
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"event_alias_test" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "nginx.access",
"doc_count" : 941637
}
]
}
},
"profile" : {
"shards" : [
{
"id" : "[6zKvMLm3RQOlBuzH_74A1g][const-logs-frank-delme-1][0]",
"searches" : [
{
"query" : [
{
"type" : "MatchAllDocsQuery",
"description" : "*:*",
"time" : "431nanos",
"time_in_nanos" : 431,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 0,
"match" : 0,
"next_doc_count" : 0,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 0,
"advance_count" : 0,
"score" : 0,
"build_scorer_count" : 0,
"create_weight" : 431,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 0
}
}
],
"rewrite_time" : 1586,
"collector" : [
{
"name" : "MultiCollector",
"reason" : "search_multi",
"time" : "139.1micros",
"time_in_nanos" : 139110,
"children" : [
{
"name" : "EarlyTerminatingCollector",
"reason" : "search_count",
"time" : "56.9micros",
"time_in_nanos" : 56946
},
{
"name" : "MultiBucketCollector: [org.elasticsearch.search.aggregations.MultiBucketCollector$1@7db930b]",
"reason" : "aggregation",
"time" : "47.3micros",
"time_in_nanos" : 47378
}
]
}
]
}
],
"aggregations" : [
{
"type" : "StringTermsAggregatorFromFilters",
"description" : "event_alias_test",
"time" : "16.3micros",
"time_in_nanos" : 16393,
"breakdown" : {
"reduce" : 0,
"post_collection_count" : 1,
"build_leaf_collector" : 9997,
"build_aggregation" : 4381,
"build_aggregation_count" : 1,
"build_leaf_collector_count" : 12,
"post_collection" : 818,
"initialize" : 1197,
"initialize_count" : 1,
"reduce_count" : 0,
"collect" : 0,
"collect_count" : 0
},
"debug" : {
"delegate" : "FiltersAggregator.FilterByFilter",
"delegate_debug" : {
"segments_with_doc_count_field" : 0,
"segments_with_deleted_docs" : 0,
"filters" : [
{
"results_from_metadata" : 12,
"query" : "*:*",
"scorers_prepared_while_estimating_cost" : 0,
"specialized_for" : "match_all"
}
],
"segments_counted" : 12,
"segments_collected" : 0
}
}
}
]
}
]
}
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Runtime fields for backwards compatibility are very slow in order to do splits on the constant_keyword. Looks like no optimization here for the constant_keyword and runtime scripts.