Skip to content

Instantly share code, notes, and snippets.

@filimonov
Forked from akuzm/stats.sh
Created July 1, 2020 06:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save filimonov/b2fa4a1640acc6acbe922d31d09cae51 to your computer and use it in GitHub Desktop.
Save filimonov/b2fa4a1640acc6acbe922d31d09cae51 to your computer and use it in GitHub Desktop.
#!/bin/bash
set -e
rm -rf {cumulative,histograms,stats,metric-names}.{tsv,png} histogram-*.png ||:
clickhouse-local --query "
-- leave only statistically significant changes of metrics of sufficient magnitude
create view metric_stats_filtered as
select *
from file('report/query-metric-stats.tsv', TSVWithNamesAndTypes,
'metric_name text, left float, right float, diff float, stat_threshold float, test text, query_index int, query_display_name text')
where isFinite(diff)
and abs(diff) > stat_threshold
;
create table stats engine File(TSV, 'stats.tsv') as
select count(*) num_queries,
(arrayMap(x->floor(x, 3), quantiles(0.1, 0.5, 0.99)(diff)) as q)[1] min, q[2] med, q[3] max,
floor(avg(diff), 3) avg,
metric_name,
arraySlice(
arraySort(x->-x.2,
arrayMap(x, y -> (x, y),
(sumMap([test], [1]) as sm).1, sm.2)),
1, 3) top_changed_tests
from metric_stats_filtered
where abs(diff) > stat_threshold
and isFinite(diff)
group by metric_name
having num_queries > 10 and q[3] - q[1] > 0.1
and (abs(avg) > 0.01 or abs(med) > 0.01)
;
select * from stats order by med desc format PrettySpace;
create table metric_names engine File(TSV, 'metric-names.tsv') as
select distinct metric_name from stats;
create table histograms engine File(TSV, 'histograms.tsv') as
-- limit histogram to [lt, rt], put outliers into two marginal buckets
with -0.8 as lt, 0.8 as rt, 0.02 as eps,
diff as dd,
-- leftmost bucket (-inf, lt)
(lt - eps, lt, toFloat64(countIf(dd < lt))) as leftmost,
-- rightmost (rt, +inf) buckets
(rt, rt + eps, toFloat64(countIf(dd > rt))) as rightmost,
-- histogram for [lt, rt]
histogramIf(50)(dd, lt <= dd and dd <= rt) as hist,
hist[length(hist)] as rightmost_hist_bucket,
-- steps for gnuplot -- leftmost x coord + y coord.
-- outlier buckets and first and last histogram bucket may be not
-- adjacent, hence the need to zero out the steps.
arrayJoin(
arrayConcat(
arrayMap(x -> (x.1, x.3), hist),
[(leftmost.1, leftmost.3),
(leftmost.2, 0),
(rightmost_hist_bucket.2, 0),
(rightmost.1, 0),
(rightmost.1, rightmost.3)
-- do not add this last one tick because it'll break
-- the cumulative graph
-- (rightmost.2, rightmost.3)
])) as steps
select metric_name, steps.1 step_left, steps.2 step_height
from metric_stats_filtered
where metric_name in metric_names
group by metric_name
order by metric_name, step_left, step_height
;
create table cumulative engine File(TSV, 'cumulative.tsv') as
select * from (
select metric_name, groupArray(step_left) as left,
arrayCumSum(groupArray(step_height)) height
from (select * from histograms order by step_left)
group by metric_name)
array join left, height
;
"
for variant in histograms cumulative
do
for m in $(cat metric-names.tsv)
do
gnuplot -e "
set terminal png size 960,540;
set datafile missing NaN;
set grid;
set key left;
plot '$variant.tsv' using 2:(stringcolumn(1) eq '$m' ? \$3 : NaN) with steps title '$m' noenhanced;
" > $variant-$m.png &
done
gnuplot -e "
set terminal png size 960,540;
set datafile missing NaN;
set grid;
set key left;
plot for [m in '$(cat metric-names.tsv)'] '$variant.tsv' using 2:(stringcolumn(1) eq m ? \$3 : NaN) with steps title m noenhanced;
" > $variant-all.png &
gnuplot -e "
set terminal png size 960,540;
set datafile missing NaN;
set grid;
set key left;
plot for [m in 'SoftPageFaults client_time server_time SystemTimeMicroseconds'] '$variant.tsv' using 2:(stringcolumn(1) eq m ? \$3 : NaN) with steps title m noenhanced;
" > $variant-custom.png &
done
wait
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment