Skip to content

Instantly share code, notes, and snippets.

@elsid
Last active June 20, 2020 00:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save elsid/995c1c6db11d756a292c23726672c91b to your computer and use it in GitHub Desktop.
Save elsid/995c1c6db11d756a292c23726672c91b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import click
import collections
import matplotlib.pyplot
import numpy
import statistics
import sys
@click.command()
@click.option('--print_keys', is_flag=True)
@click.option('--timeseries', type=str, multiple=True)
@click.option('--hist', type=str, multiple=True)
@click.option('--plot', nargs=3, type=str, multiple=True)
@click.option('--stats', type=str, multiple=True)
@click.argument('path', default='', type=click.Path())
def main(print_keys, timeseries, hist, plot, stats, path):
data = list(read_data(path))
keys = collect_unique_keys(data)
frames = collect_per_frame(data=data, keys=keys)
if print_keys:
for v in keys:
print(v)
if timeseries:
draw_timeseries(frames=frames, keys=timeseries)
if hist:
draw_hists(frames=frames, keys=hist)
if plot:
draw_plots(frames=frames, plots=plot)
if stats:
print_stats(frames=frames, keys=stats)
matplotlib.pyplot.show()
def read_data(path):
with open(path) if path else sys.stdin as stream:
frame = dict()
for line in stream:
if line.startswith('Stats Viewer'):
if frame:
yield frame
_, _, key, value = line.split(' ')
frame = {key: int(value)}
elif line.startswith(' '):
key, value = line.strip().rsplit(maxsplit=1)
frame[key] = to_number(value)
def collect_per_frame(data, keys):
result = collections.defaultdict(list)
for frame in data:
for key in keys:
if key in frame:
result[key].append(frame[key])
else:
result[key].append(0)
for key, values in result.items():
result[key] = numpy.array(values)
return result
def collect_unique_keys(frames):
result = set()
for frame in frames:
for key in frame.keys():
result.add(key)
return sorted(result)
def draw_timeseries(frames, keys):
fig, ax = matplotlib.pyplot.subplots()
x = numpy.array(range(max(len(v) for k, v in frames.items() if k in keys)))
for key in keys:
ax.plot(x, frames[key], label=key)
ax.grid(True)
ax.legend()
fig.canvas.set_window_title('timeseries')
def draw_hists(frames, keys):
fig, ax = matplotlib.pyplot.subplots()
bins = numpy.linspace(
start=min(min(v) for k, v in frames.items() if k in keys),
stop=max(max(v) for k, v in frames.items() if k in keys),
num=20,
)
for key in keys:
ax.hist(frames[key], bins=bins, label=key, alpha=1 / len(keys))
ax.set_xticks(bins)
ax.grid(True)
ax.legend()
fig.canvas.set_window_title('hists')
def draw_plots(frames, plots):
fig, ax = matplotlib.pyplot.subplots()
for x_key, y_key, agg in plots:
if agg is None:
ax.plot(frames[x_key], frames[y_key], label=f'x={x_key}, y={y_key}')
elif agg:
agg_f = dict(
mean=statistics.mean,
median=statistics.median,
)[agg]
grouped = collections.defaultdict(list)
for x, y in zip(frames[x_key], frames[y_key]):
grouped[x].append(y)
aggregated = sorted((k, agg_f(v)) for k, v in grouped.items())
ax.plot(
numpy.array([v[0] for v in aggregated]),
numpy.array([v[1] for v in aggregated]),
label=f'x={x_key}, y={y_key}, agg={agg}',
)
ax.grid(True)
ax.legend()
fig.canvas.set_window_title('plots')
def print_stats(frames, keys):
stats = [make_stats(key=key, values=frames[key]) for key in keys]
metrics = list(stats[0].keys())
row(*metrics)
for key_stats in stats:
row(*key_stats.values())
def make_stats(key, values):
return collections.OrderedDict(
key=key,
number=len(values),
min=min(values),
max=max(values),
mean=statistics.mean(values),
median=statistics.median(values),
stdev=statistics.stdev(values),
q95=numpy.quantile(values, 0.95),
)
def row(*args):
print(('{:>25}' * len(args)).format(*args))
def to_number(value):
try:
return int(value)
except ValueError:
return float(value)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment