Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FrankHassanabad/e15dc17b7b63982eabd185bd8be903d9 to your computer and use it in GitHub Desktop.
Save FrankHassanabad/e15dc17b7b63982eabd185bd8be903d9 to your computer and use it in GitHub Desktop.
Slow queries using runtime fields to split against "data_stream.dataset" to create "event.module"
# Create an index which has a runtime field to do a split against a "constant_keyword"
# of "data_stream.dataset" and use the first value found as the "event.module"
# I add an additional "host.name" type of keyword to compare
# * Profile of aggregating against the runtime field which does a split against the "constant_keyword"
# * Profile of aggregating against the const_keyword
# * Profile of aggregating against a normal keyword
# * Profile against a field alias against the "constant_keyword" field
DELETE const-logs-frank-delme-1
PUT const-logs-frank-delme-1
{
"mappings": {
"dynamic": "false",
"runtime": {
"event.module": {
"type": "keyword",
"script": {
"source": """
def split_module=doc['data_stream.dataset'].value.splitOnToken('.');
emit(split_module[0])
"""
}
}
},
"properties": {
"@timestamp": {
"type": "date"
},
"message": {
"type": "text"
},
"host": {
"properties": {
"name": {
"type": "keyword"
}
}
},
"data_stream": {
"properties": {
"dataset": {
"type": "constant_keyword",
"value": "nginx.access"
}
}
},
"event_alias_test": {
"properties": {
"dataset": {
"type": "alias",
"path": "data_stream.dataset"
}
}
}
}
}
}
# Re-index two auditbeat indexes from estc to get us close to 1 million records for a profile run
POST _reindex?wait_for_completion=false
{
"source": {
"index": "auditbeat-8.0.0-2021.04.26-000007,auditbeat-8.0.0-2021.05.26-000008"
},
"dest": {
"index": "const-logs-frank-delme-1"
}
}
# Count should around 941,317 once indexing is complete.
GET const-logs-frank-delme-1/_count
# Profile run against the runtime field
GET const-logs-frank-delme-1/_search?request_cache=false&human=true
{
"size": 0,
"profile": true,
"track_total_hits": false,
"aggs": {
"event_module": {
"terms": {
"field": "event.module"
}
}
}
}
# Profile run against the host.name keyword field
GET const-logs-frank-delme-1/_search?request_cache=false&human=true
{
"size": 0,
"profile": true,
"track_total_hits": false,
"aggs": {
"event_module": {
"terms": {
"field": "host.name"
}
}
}
}
# Profile run against the constant_keyword field directly
GET const-logs-frank-delme-1/_search?request_cache=false&human=true
{
"size": 0,
"profile": true,
"track_total_hits": false,
"aggs": {
"event_module": {
"terms": {
"field": "data_stream.dataset"
}
}
}
}
# Profile against a field alias against the "constant_keyword" field
GET const-logs-frank-delme-1/_search?request_cache=false&human=true
{
"size": 0,
"profile": true,
"track_total_hits": false,
"aggs": {
"event_alias_test": {
"terms": {
"field": "event_alias_test.dataset"
}
}
}
}
@FrankHassanabad
Copy link
Author

FrankHassanabad commented May 28, 2021

Runtime fields for backwards compatibility are very slow in order to do splits on the constant_keyword. Looks like no optimization here for the constant_keyword and runtime scripts.

# Profile run against the runtime field
GET const-logs-frank-delme-1/_search?request_cache=false&human=true
{
  "size": 0,
  "profile": true, 
  "track_total_hits": false,
  "aggs": {
    "event_module": {
      "terms": {
        "field": "event.module"
      }
    }
  }
}
{
  "took" : 464,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "event_module" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "nginx",
          "doc_count" : 941637
        }
      ]
    }
  },
  "profile" : {
    "shards" : [
      {
        "id" : "[6zKvMLm3RQOlBuzH_74A1g][const-logs-frank-delme-1][0]",
        "searches" : [
          {
            "query" : [
              {
                "type" : "MatchAllDocsQuery",
                "description" : "*:*",
                "time" : "30.8ms",
                "time_in_nanos" : 30809553,
                "breakdown" : {
                  "set_min_competitive_score_count" : 0,
                  "match_count" : 0,
                  "shallow_advance_count" : 0,
                  "set_min_competitive_score" : 0,
                  "next_doc" : 30743354,
                  "match" : 0,
                  "next_doc_count" : 941637,
                  "score_count" : 0,
                  "compute_max_score_count" : 0,
                  "compute_max_score" : 0,
                  "advance" : 2538,
                  "advance_count" : 12,
                  "score" : 0,
                  "build_scorer_count" : 24,
                  "create_weight" : 395,
                  "shallow_advance" : 0,
                  "create_weight_count" : 1,
                  "build_scorer" : 63266
                }
              }
            ],
            "rewrite_time" : 1620,
            "collector" : [
              {
                "name" : "MultiCollector",
                "reason" : "search_multi",
                "time" : "420.3ms",
                "time_in_nanos" : 420361224,
                "children" : [
                  {
                    "name" : "EarlyTerminatingCollector",
                    "reason" : "search_count",
                    "time" : "372.1micros",
                    "time_in_nanos" : 372156
                  },
                  {
                    "name" : "MultiBucketCollector: [org.elasticsearch.search.aggregations.MultiBucketCollector$1@56de9cbe]",
                    "reason" : "aggregation",
                    "time" : "359ms",
                    "time_in_nanos" : 359026659
                  }
                ]
              }
            ]
          }
        ],
        "aggregations" : [
          {
            "type" : "MapStringTermsAggregator",
            "description" : "event_module",
            "time" : "376.2ms",
            "time_in_nanos" : 376203445,
            "breakdown" : {
              "reduce" : 0,
              "post_collection_count" : 1,
              "build_leaf_collector" : 103523,
              "build_aggregation" : 6939,
              "build_aggregation_count" : 1,
              "build_leaf_collector_count" : 12,
              "post_collection" : 1632,
              "initialize" : 2636,
              "initialize_count" : 1,
              "reduce_count" : 0,
              "collect" : 376088715,
              "collect_count" : 941637
            },
            "debug" : {
              "total_buckets" : 1,
              "result_strategy" : "terms"
            }
          }
        ]
      }
    ]
  }
}

@FrankHassanabad
Copy link
Author

This is still faster and it is not using the constant_keyword but rather just regular keyword for host.name:

# Profile run against the host.name keyword field
GET const-logs-frank-delme-1/_search?request_cache=false&human=true
{
  "size": 0,
  "profile": true, 
  "track_total_hits": false,
  "aggs": {
    "event_module": {
      "terms": {
        "field": "host.name"
      }
    }
  }
}
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "event_module" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "<redacted>", <-- redacted
          "doc_count" : 758221
        },
        {
          "key" : "<redacted>",  <-- redacted
          "doc_count" : 178220
        },
        {
          "key" : "<redacted>",  <-- redacted
          "doc_count" : 5196
        }
      ]
    }
  },
  "profile" : {
    "shards" : [
      {
        "id" : "[6zKvMLm3RQOlBuzH_74A1g][const-logs-frank-delme-1][0]",
        "searches" : [
          {
            "query" : [
              {
                "type" : "MatchAllDocsQuery",
                "description" : "*:*",
                "time" : "443nanos",
                "time_in_nanos" : 443,
                "breakdown" : {
                  "set_min_competitive_score_count" : 0,
                  "match_count" : 0,
                  "shallow_advance_count" : 0,
                  "set_min_competitive_score" : 0,
                  "next_doc" : 0,
                  "match" : 0,
                  "next_doc_count" : 0,
                  "score_count" : 0,
                  "compute_max_score_count" : 0,
                  "compute_max_score" : 0,
                  "advance" : 0,
                  "advance_count" : 0,
                  "score" : 0,
                  "build_scorer_count" : 0,
                  "create_weight" : 443,
                  "shallow_advance" : 0,
                  "create_weight_count" : 1,
                  "build_scorer" : 0
                }
              }
            ],
            "rewrite_time" : 1836,
            "collector" : [
              {
                "name" : "MultiCollector",
                "reason" : "search_multi",
                "time" : "220.6micros",
                "time_in_nanos" : 220630,
                "children" : [
                  {
                    "name" : "EarlyTerminatingCollector",
                    "reason" : "search_count",
                    "time" : "58.3micros",
                    "time_in_nanos" : 58399
                  },
                  {
                    "name" : "MultiBucketCollector: [org.elasticsearch.search.aggregations.MultiBucketCollector$1@117ae383]",
                    "reason" : "aggregation",
                    "time" : "128micros",
                    "time_in_nanos" : 128050
                  }
                ]
              }
            ]
          }
        ],
        "aggregations" : [
          {
            "type" : "StringTermsAggregatorFromFilters",
            "description" : "event_module",
            "time" : "95.8micros",
            "time_in_nanos" : 95802,
            "breakdown" : {
              "reduce" : 0,
              "post_collection_count" : 1,
              "build_leaf_collector" : 88649,
              "build_aggregation" : 5577,
              "build_aggregation_count" : 1,
              "build_leaf_collector_count" : 12,
              "post_collection" : 431,
              "initialize" : 1145,
              "initialize_count" : 1,
              "reduce_count" : 0,
              "collect" : 0,
              "collect_count" : 0
            },
            "debug" : {
              "delegate" : "FiltersAggregator.FilterByFilter",
              "delegate_debug" : {
                "segments_with_doc_count_field" : 0,
                "segments_with_deleted_docs" : 0,
                "filters" : [
                  {
                    "results_from_metadata" : 12,
                    "query" : "host.name:akroh-windows",
                    "scorers_prepared_while_estimating_cost" : 0,
                    "specialized_for" : "term"
                  },
                  {
                    "results_from_metadata" : 12,
                    "query" : "host.name:bastion00",
                    "scorers_prepared_while_estimating_cost" : 0,
                    "specialized_for" : "term"
                  },
                  {
                    "results_from_metadata" : 12,
                    "query" : "host.name:kibana00",
                    "scorers_prepared_while_estimating_cost" : 0,
                    "specialized_for" : "term"
                  }
                ],
                "segments_counted" : 12,
                "segments_collected" : 0
              }
            }
          }
        ]
      }
    ]
  }
}

@FrankHassanabad
Copy link
Author

Directly against the constant_keyword is fast

# Profile run against the constant_keyword field directly
GET const-logs-frank-delme-1/_search?request_cache=false&human=true
{
  "size": 0,
  "profile": true, 
  "track_total_hits": false,
  "aggs": {
    "event_module": {
      "terms": {
        "field": "data_stream.dataset"
      }
    }
  }
}
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "event_module" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "nginx.access",
          "doc_count" : 941637
        }
      ]
    }
  },
  "profile" : {
    "shards" : [
      {
        "id" : "[6zKvMLm3RQOlBuzH_74A1g][const-logs-frank-delme-1][0]",
        "searches" : [
          {
            "query" : [
              {
                "type" : "MatchAllDocsQuery",
                "description" : "*:*",
                "time" : "430nanos",
                "time_in_nanos" : 430,
                "breakdown" : {
                  "set_min_competitive_score_count" : 0,
                  "match_count" : 0,
                  "shallow_advance_count" : 0,
                  "set_min_competitive_score" : 0,
                  "next_doc" : 0,
                  "match" : 0,
                  "next_doc_count" : 0,
                  "score_count" : 0,
                  "compute_max_score_count" : 0,
                  "compute_max_score" : 0,
                  "advance" : 0,
                  "advance_count" : 0,
                  "score" : 0,
                  "build_scorer_count" : 0,
                  "create_weight" : 430,
                  "shallow_advance" : 0,
                  "create_weight_count" : 1,
                  "build_scorer" : 0
                }
              }
            ],
            "rewrite_time" : 1470,
            "collector" : [
              {
                "name" : "MultiCollector",
                "reason" : "search_multi",
                "time" : "144.6micros",
                "time_in_nanos" : 144619,
                "children" : [
                  {
                    "name" : "EarlyTerminatingCollector",
                    "reason" : "search_count",
                    "time" : "60.8micros",
                    "time_in_nanos" : 60804
                  },
                  {
                    "name" : "MultiBucketCollector: [org.elasticsearch.search.aggregations.MultiBucketCollector$1@44cea343]",
                    "reason" : "aggregation",
                    "time" : "48.6micros",
                    "time_in_nanos" : 48670
                  }
                ]
              }
            ]
          }
        ],
        "aggregations" : [
          {
            "type" : "StringTermsAggregatorFromFilters",
            "description" : "event_module",
            "time" : "17.4micros",
            "time_in_nanos" : 17442,
            "breakdown" : {
              "reduce" : 0,
              "post_collection_count" : 1,
              "build_leaf_collector" : 10512,
              "build_aggregation" : 4870,
              "build_aggregation_count" : 1,
              "build_leaf_collector_count" : 12,
              "post_collection" : 446,
              "initialize" : 1614,
              "initialize_count" : 1,
              "reduce_count" : 0,
              "collect" : 0,
              "collect_count" : 0
            },
            "debug" : {
              "delegate" : "FiltersAggregator.FilterByFilter",
              "delegate_debug" : {
                "segments_with_doc_count_field" : 0,
                "segments_with_deleted_docs" : 0,
                "filters" : [
                  {
                    "results_from_metadata" : 12,
                    "query" : "*:*",
                    "scorers_prepared_while_estimating_cost" : 0,
                    "specialized_for" : "match_all"
                  }
                ],
                "segments_counted" : 12,
                "segments_collected" : 0
              }
            }
          }
        ]
      }
    ]
  }
}

@FrankHassanabad
Copy link
Author

Using an alias against the constant_keyword is still fast

# Profile against a field alias against the "constant_keyword" field
GET const-logs-frank-delme-1/_search?request_cache=false&human=true
{
  "size": 0,
  "profile": true,
  "track_total_hits": false,
  "aggs": {
    "event_alias_test": {
      "terms": {
        "field": "event_alias_test.dataset"
      }
    }
  }
}
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "event_alias_test" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "nginx.access",
          "doc_count" : 941637
        }
      ]
    }
  },
  "profile" : {
    "shards" : [
      {
        "id" : "[6zKvMLm3RQOlBuzH_74A1g][const-logs-frank-delme-1][0]",
        "searches" : [
          {
            "query" : [
              {
                "type" : "MatchAllDocsQuery",
                "description" : "*:*",
                "time" : "431nanos",
                "time_in_nanos" : 431,
                "breakdown" : {
                  "set_min_competitive_score_count" : 0,
                  "match_count" : 0,
                  "shallow_advance_count" : 0,
                  "set_min_competitive_score" : 0,
                  "next_doc" : 0,
                  "match" : 0,
                  "next_doc_count" : 0,
                  "score_count" : 0,
                  "compute_max_score_count" : 0,
                  "compute_max_score" : 0,
                  "advance" : 0,
                  "advance_count" : 0,
                  "score" : 0,
                  "build_scorer_count" : 0,
                  "create_weight" : 431,
                  "shallow_advance" : 0,
                  "create_weight_count" : 1,
                  "build_scorer" : 0
                }
              }
            ],
            "rewrite_time" : 1586,
            "collector" : [
              {
                "name" : "MultiCollector",
                "reason" : "search_multi",
                "time" : "139.1micros",
                "time_in_nanos" : 139110,
                "children" : [
                  {
                    "name" : "EarlyTerminatingCollector",
                    "reason" : "search_count",
                    "time" : "56.9micros",
                    "time_in_nanos" : 56946
                  },
                  {
                    "name" : "MultiBucketCollector: [org.elasticsearch.search.aggregations.MultiBucketCollector$1@7db930b]",
                    "reason" : "aggregation",
                    "time" : "47.3micros",
                    "time_in_nanos" : 47378
                  }
                ]
              }
            ]
          }
        ],
        "aggregations" : [
          {
            "type" : "StringTermsAggregatorFromFilters",
            "description" : "event_alias_test",
            "time" : "16.3micros",
            "time_in_nanos" : 16393,
            "breakdown" : {
              "reduce" : 0,
              "post_collection_count" : 1,
              "build_leaf_collector" : 9997,
              "build_aggregation" : 4381,
              "build_aggregation_count" : 1,
              "build_leaf_collector_count" : 12,
              "post_collection" : 818,
              "initialize" : 1197,
              "initialize_count" : 1,
              "reduce_count" : 0,
              "collect" : 0,
              "collect_count" : 0
            },
            "debug" : {
              "delegate" : "FiltersAggregator.FilterByFilter",
              "delegate_debug" : {
                "segments_with_doc_count_field" : 0,
                "segments_with_deleted_docs" : 0,
                "filters" : [
                  {
                    "results_from_metadata" : 12,
                    "query" : "*:*",
                    "scorers_prepared_while_estimating_cost" : 0,
                    "specialized_for" : "match_all"
                  }
                ],
                "segments_counted" : 12,
                "segments_collected" : 0
              }
            }
          }
        ]
      }
    ]
  }
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment