Skip to content

Instantly share code, notes, and snippets.

@esoergel
Created May 1, 2024 18:38
Show Gist options
  • Save esoergel/8f6ed01396e4b09cd920f3f0e8216641 to your computer and use it in GitHub Desktop.
Save esoergel/8f6ed01396e4b09cd920f3f0e8216641 to your computer and use it in GitHub Desktop.
Script for interpreting elasticsearch profile JSON
#!/usr/bin/env python
"""
Script for interpreting elasticsearch profile JSON. It traverses the profile
data per shard, extracting all leaf nodes that took more than 1ms to process.
These are then (roughly) joined across shards and aggregated. The hope is that
this will clearly highlight the worst offenders in the query.
Usage:
$ ./interpret_es_profile.py es_profile_3_main_2024-05-01T16.15.57.json
Overall time per shard
Avg: 10.2687 - Min: 0.0203 - Med: 10.0286 - Max: 28.2535 - StDev: 9.1360 - count 96
Leaf nodes over 1ms
9.7365 s: TermInSetQuery - _uid:case#********************* _uid:case#************************ [...]
Avg: 9.7365 - Min: 0.0064 - Med: 9.4433 - Max: 27.4775 - StDev: 8.8363 - count 96
0.2373 s: MultiTermQueryConstantScoreWrapper - case_properties.value:mylastname~1
Avg: 0.2373 - Min: 0.0014 - Med: 0.2454 - Max: 0.5847 - StDev: 0.1786 - count 93
0.2225 s: TermQuery - case_properties.value.phonetic:X1011
Avg: 0.2225 - Min: 0.0014 - Med: 0.0572 - Max: 4.2817 - StDev: 0.5291 - count 94
0.0595 s: MultiTermQueryConstantScoreWrapper - case_properties.value:myfirstname~1
Avg: 0.0595 - Min: 0.0010 - Med: 0.0533 - Max: 0.1843 - StDev: 0.0494 - count 84
"""
import argparse
import json
from dataclasses import dataclass, field
from textwrap import shorten
from typing import List
import numpy
@dataclass()
class TimeArray:
"""Stores a list of evaluation times per shard, in nanoseconds"""
times: List[int] = field(default_factory=list)
def append(self, time):
self.times.append(time)
@property
def average(self):
return sum(self.times) / len(self.times)
def __str__(self):
return " - ".join(f"{name}: {val / 10**9:>7.4f}" for name, val in [
("Avg", self.average),
("Min", min(self.times)),
("Med", numpy.median(self.times)),
("Max", max(self.times)),
("StDev", numpy.std(self.times)),
]) + f" - count {len(self.times)}"
def __repr__(self):
return f"TimeArray(avg={self.average / 10**9})"
def parse_profile(profile_json):
return [shard['searches'][0]['query'][0]
for shard in profile_json['shards']]
def show_speed_per_shard(shards):
time = TimeArray([shard['time_in_nanos'] for shard in shards])
print("\nOverall time per shard")
print(f" {time}")
def find_leaf_nodes(shard, threshold=None):
# Ignore nodes under a millisecond
if shard['time_in_nanos'] > 10**6:
if 'children' in shard:
for child in shard['children']:
yield from find_leaf_nodes(child, threshold)
else:
# It's a leaf node
yield shard
def worst_leaf_nodes(shards):
times_by_query = {}
for shard in shards:
for leaf_node in find_leaf_nodes(shard):
key = (leaf_node['type'], leaf_node['description'])
if key not in times_by_query:
times_by_query[key] = TimeArray()
times_by_query[key].append(leaf_node['time_in_nanos'])
return times_by_query
def show_worst_leaf_nodes(shards):
times = sorted(worst_leaf_nodes(shards).items(),
key=lambda x: x[1].average, reverse=True)
print("\nLeaf nodes over 1ms")
for (qtype, description), time in times:
description = shorten(description, 150)
print(f"{time.average / 10**9:<7.4f}s: {qtype} - {description}")
print(f" {time}")
def main():
parser = argparse.ArgumentParser(description='Process ES profile output.')
parser.add_argument("profile_json", type=argparse.FileType("r"))
args = parser.parse_args()
profile_json = json.load(args.profile_json)
shards = parse_profile(profile_json)
show_speed_per_shard(shards)
show_worst_leaf_nodes(shards)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment