Skip to content

Instantly share code, notes, and snippets.

@akuzm

akuzm/scatter.sh Secret

Created August 19, 2021 14:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akuzm/bb28a442f882349e0a5ec2b5262b97d0 to your computer and use it in GitHub Desktop.
Save akuzm/bb28a442f882349e0a5ec2b5262b97d0 to your computer and use it in GitHub Desktop.
#!/bin/bash
set -e
rm -rf {cumulative,histograms,stats,metric-names,normalized}.{tsv,png} histogram-*.png *_scatter_*.png ||:
clickhouse-local --verbose --query "
create view query_metric_stats_denorm as select * from file(
'analyze/query-metric-stats-denorm.tsv', TSVWithNamesAndTypes,
'test text, query_index int, metric_name text, left float, right float,
diff float, stat_threshold float')
where test = '$1' and query_index = $2
;
create view query_run_metrics_denorm as
select * from file('analyze/query-run-metrics-denorm.tsv', TSV,
'test text, query_index int, metric_name text, version int,
query_id text, metric_value float')
where test = '$1' and query_index = $2
;
-- leave only statistically significant changes of metrics of sufficient magnitude
create view metric_stats_filtered as
select *
from query_metric_stats_denorm
where
(
isFinite(diff)
-- (
-- isFinite(diff)
-- and abs(diff) > stat_threshold
-- --and abs(diff) > 0.05
-- )
-- or
-- (
-- isFinite(stat_threshold)
-- and stat_threshold > 0.05
-- )
)
;
select * from metric_stats_filtered order by diff desc format PrettySpace;
create table metric_names engine File(TSV, 'metric-names.tsv') as
select distinct metric_name from metric_stats_filtered;
create view run_times as
select test, query_index, version, query_id, metric_value run_time
from query_run_metrics_denorm
where metric_name = 'client_time'
;
create view unstable_run_traces_left as
select test, query_index, metric metric_name, 0 version, query_id,
value metric_value
from file('unstable-run-traces.left.rep', TSVWithNamesAndTypes,
'test text, query_index int, query_id text, value float, metric text')
;
create view unstable_run_traces_right as
select test, query_index, metric metric_name, 1 version,
query_id, value metric_value
from file('unstable-run-traces.right.rep', TSVWithNamesAndTypes,
'test text, query_index int, query_id text, value float, metric text')
;
create view unstable_run_traces as
select * from (
select * from unstable_run_traces_left
union all
select * from unstable_run_traces_right
)
where test = '$1' and query_index = $2
;
create view interesting_trace_names as
select metric_name
from unstable_run_traces
group by test, query_index, metric_name
having (stddevPop(metric_value) as d) > 0.1
order by d desc
limit 20
;
select * from interesting_trace_names;
create view interesting_metrics as
select * from unstable_run_traces
where metric_name in interesting_trace_names
union all
select * from query_run_metrics_denorm
where metric_name in metric_names
;
create table normalized engine File(TSV, 'normalized.tsv') as
with
substr(
replaceAll(
replaceAll(metric_name, '\'', '_'),
' ', '_'),
1, 100)
as name_escaped,
arrayJoin([
(name_escaped, metric_value),
(name_escaped || '_per_client_time', metric_value / run_time)
])
as metric_tuple
select
test, query_index, metric_tuple.1 name, version, query_id,
metric_tuple.2 value, run_time
from interesting_metrics
join run_times
using (test, query_index, version, query_id)
;
"
#for metric in $(cat metric-names.tsv)
for metric in $(cat normalized.tsv | cut -f3 | sort | uniq)
do
gnuplot -e "
set terminal png size 960,540;
set datafile missing NaN;
set grid;
set key left;
set yrange [0:2];
unset ytics;
set xrange [0:*];
set title '$1, query no. $2: $metric on X axis w/random jitter on Y axis' noenhanced;
plot for [v in '0 1'] 'normalized.tsv'
using (strcol(4) eq v && strcol(1) eq '$1' && strcol(2) eq '$2' && strcol(3) eq '$metric' ? \$6 : NaN):(v + 0.5 + (rand(0) - 0.5) / 2)
with points pt v eq '0' ? 9 : 11
title v eq '0' ? 'Left' : 'Right' noenhanced;
" > "$1_$2_scatter_$metric.png" &
gnuplot -e "
set terminal png size 960,540;
set datafile missing NaN;
set grid;
set key top center;
set yrange [0:*];
set title '$1, query no. $2: plot of $metric to client_time' noenhanced;
set xlabel 'client_time, s' noenhanced;
set ylabel '$metric' noenhanced;
plot for [v in '0 1'] 'normalized.tsv'
using 7:(strcol(4) eq v && strcol(1) eq '$1' && strcol(2) eq '$2' && strcol(3) eq '$metric' ? \$6 : NaN)
with points pt v eq '0' ? 9 : 11
title v eq '0' ? 'Old' : 'New' noenhanced;
" > "$1_$2_scatter_2d_$metric.png" &
done
wait
#using (strcol(4) eq v and strcol(1) eq '$1' and strcol(2) eq '$2' and strcol(3) eq '$3' ? \$6 : NaN):(v + rand(0))
#akuzm@akuzm-nix:~/w54$ head analyze/query-run-metrics-denorm.tsv
#sum_map 0 ArenaAllocBytes 0 sum_map.query0.run0 4096
#sum_map 0 ArenaAllocBytes 0 sum_map.query0.run1 4096
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment