pkkm/analyze.R

## analyze.R
#!/usr/bin/env Rscript

library(ggplot2)
library(reshape2)
library(pander)

data <- read.csv("results/results.csv", check.names=FALSE)

# Reorder columns for readability.
col_order <- c("Least-squares slope", "Theil-Sen slope",
               "Mean", "Median of means",
               "Minimum of means", "Quartile 1 of means", "Quartile 3 of means")
data <- data[, col_order]

molten <- melt(data)

# Assign categories to variables.
molten$type <- factor(
    "This should never be visible",
    levels=c("Central tendency", "Regression", "Other", "This should never be visible"))
molten <- within(molten, type[variable == "Least-squares slope" | variable == "Theil-Sen slope"] <- "Regression")
molten <- within(molten, type[variable == "Mean" | variable == "Median of means"] <- "Central tendency")
molten <- within(molten, type[variable == "Quartile 1 of means" | variable == "Quartile 3 of means" | variable == "Minimum of means"] <- "Other")

# Draw densities.
plot <- ggplot(molten, aes(x=value, color=variable)) +
    geom_density(adjust=0.5) +
    labs(x="Time [s]", y="Number of benchmarks (smoothed)", color="") +
    facet_wrap("type", scales="fixed", ncol=1)
ggsave("results/density.pdf", plot, device=cairo_pdf, width=8, height=6)

# Draw boxplots.
plot <- ggplot(molten, aes(x=variable, y=value)) +
    geom_boxplot() +
    labs(x="Statistic", y="Time [s]") +
    theme(axis.text.x=element_text(angle=25, hjust=1))
png(filename="results/boxplot.png", type="cairo", width=1100, height=1100, units="px", res=200)
print(plot)
dev.off()

# Data range as a single number (instead of vector of min and max).
range_num <- function(data) {
    return(diff(range(data)))
}

# Summarize the spread of the data in a table.
df <- data.frame()
df[ncol(data),] <- NA
rownames(df) <- names(data)
iqr_rel <- apply(data, 2, IQR) / apply(data, 2, median)
df$`IQR/Median` <- sprintf("%.1f%%", unlist(iqr_rel * 100))
range_rel <- apply(data, 2, range_num) / apply(data, 2, median)
df$`Range/Median` <- sprintf("%.1f%%", unlist(range_rel * 100))
table <- pandoc.table.return(
    df, style="rmarkdown", justify=c("right", "left", "left"), emphasize.rownames=FALSE)
handle <- file("results/summary.md")
writeLines(table, handle)
close(handle)

## benchmark.py
#!/usr/bin/env python3

import argparse
import json
import os
import subprocess
import tempfile
import numpy
import scipy.stats

def format_s(seconds):
    """Format a time in seconds like Criterion does."""

    if seconds < 0:
        return "-" + format_s(-seconds)

    def format_with_prefix(seconds, prefix):
        """Format to 4 digits, even if they are trailing zeros."""
        if seconds >= 1e9:
            return "{:.4g} {}".format(seconds, prefix)
        for exponent in [3, 2, 1]:
            if seconds >= 10 ** exponent:
                return "{1:.{0}f} {2}".format(3 - exponent, seconds, prefix)
        return "{:.3f} {}".format(seconds, prefix)

    PREFIXES = [
        (0, ""),
        (-3, "m"),
        (-3, "m"),
        (-6, "μ"),
        (-9, "n"),
        (-12, "p"),
        (-15, "f"),
        (-18, "a")]

    for exponent, prefix in PREFIXES:
        if seconds >= 10 ** exponent:
            return format_with_prefix(seconds * 10 ** (-exponent), prefix + "s")
    return "{:g} s".format(seconds)

def format_row(a, b, c=""):
    """Format a row of output."""
    return "{: <20} {: <10} {}".format(a, b, c)

def criterion_print_extra_stats(benchmark):
    """Print some extra statistics that Criterion doesn't provide.

    `benchmark` should be a parsed JSON object describing a single benchmark
    from Criterion's output (tested on Criterion 1.2.3)."""

    # Extract columns which are interesting and should be non-null.
    keys = benchmark["reportKeys"]
    indices = {name: index for index, name in enumerate(keys)}
    def process(datum):
        return {key: datum[indices[key]]
                for key in ["time", "cpuTime", "iters"]}
    measured = list(map(process, benchmark["reportMeasured"]))

    # Criterion repeatedly executes the benchmarked code in a loop with an
    # increasing number of iterations. `time` and `cpuTime` are totals for the
    # loop and `iters` is the number of iterations.

    mean_times = [datum["time"] / datum["iters"] for datum in measured]

    print(format_row(
        "quartiles of means",
        ", ".join(
            format_s(numpy.percentile(mean_times, p))
            for p in [25, 50, 75])))

    # Theil-Sen regression of time vs. number of iterations.
    slope, intercept, *_ = scipy.stats.theilslopes(
        [m["time"] for m in measured], [m["iters"] for m in measured])
    print(format_row(
        "Theil-Sen",
        format_s(slope),
        "(intercept: {})".format(format_s(intercept))))

    print(format_row("min of means", format_s(numpy.amin(mean_times))))

def criterion_benchmark(command, time_limit_s=None):
    """Benchmark a shell command using Criterion and print the results."""

    with tempfile.TemporaryDirectory(prefix="benchmark-") as dir_name:
        json_file = os.path.join(dir_name, "criterion-out.json")

        bench_command = ["bench"]
        if time_limit_s is not None:
            bench_command += ["--time-limit", str(time_limit_s)]
        bench_command += ["--json", json_file, "--", command]

        process = subprocess.run(bench_command, stdout=subprocess.PIPE)
        print(process.stdout.decode("utf-8").rstrip("\n"))

        with open(json_file, "r") as f:
            data = json.load(f)

    data = data[2] # Skip the header.
    assert len(data) == 1 # We're always doing a single benchmark.
    criterion_print_extra_stats(data[0])

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("command", help="sh command to benchmark")
    parser.add_argument(
        "--time-limit", type=int, default=60,
        help="time limit in seconds for the whole benchmark")
    args = parser.parse_args()

    criterion_benchmark(args.command, time_limit_s=args.time_limit)

## main.sh
#!/usr/bin/env bash

command='bash -c "a=0; for i in {1..500000}; do (( a += RANDOM )); done"'
n_warmup_runs=5
n_benchmarks=60
single_benchmark_time=60

require_cmd_present() {
    for cmd in "$@"; do
        if ! command -v -- "$cmd" >/dev/null 2>&1; then
            printf "ERROR: Required command \`%s\` not found.\n" "$cmd" 1>&2
            exit 1
        fi
    done
}

require_cmd_present python3 grep cut tr sed bench Rscript

mkdir -p "results"

single_benchmark() {
	./benchmark.py "$command" --time-limit "$single_benchmark_time" |
		grep -E "time|mean|quartile|Theil|min" |
		cut -c 22- |
		cut -d\( -f1 |
		tr -cd ".,\n0-9" |
		tr "\n" "," |
		sed "s/,$//"
	printf "\n"
}

{
    for ((i=0; i<"$n_warmup_runs"; i++)); do
	    single_benchmark >/dev/null
    done

    echo "Least-squares slope,Mean,Quartile 1 of means,Median of means,Quartile 3 of means,Theil-Sen slope,Minimum of means"

    for ((i=0; i<"$n_benchmarks"; i++)); do
	    single_benchmark
    done
} >"results/results.csv"

./analyze.R
	#!/usr/bin/env Rscript

	library(ggplot2)
	library(reshape2)
	library(pander)

	data <- read.csv("results/results.csv", check.names=FALSE)

	# Reorder columns for readability.
	col_order <- c("Least-squares slope", "Theil-Sen slope",
	"Mean", "Median of means",
	"Minimum of means", "Quartile 1 of means", "Quartile 3 of means")
	data <- data[, col_order]

	molten <- melt(data)

	# Assign categories to variables.
	molten$type <- factor(
	"This should never be visible",
	levels=c("Central tendency", "Regression", "Other", "This should never be visible"))
	molten <- within(molten, type[variable == "Least-squares slope" \| variable == "Theil-Sen slope"] <- "Regression")
	molten <- within(molten, type[variable == "Mean" \| variable == "Median of means"] <- "Central tendency")
	molten <- within(molten, type[variable == "Quartile 1 of means" \| variable == "Quartile 3 of means" \| variable == "Minimum of means"] <- "Other")

	# Draw densities.
	plot <- ggplot(molten, aes(x=value, color=variable)) +
	geom_density(adjust=0.5) +
	labs(x="Time [s]", y="Number of benchmarks (smoothed)", color="") +
	facet_wrap("type", scales="fixed", ncol=1)
	ggsave("results/density.pdf", plot, device=cairo_pdf, width=8, height=6)

	# Draw boxplots.
	plot <- ggplot(molten, aes(x=variable, y=value)) +
	geom_boxplot() +
	labs(x="Statistic", y="Time [s]") +
	theme(axis.text.x=element_text(angle=25, hjust=1))
	png(filename="results/boxplot.png", type="cairo", width=1100, height=1100, units="px", res=200)
	print(plot)
	dev.off()

	# Data range as a single number (instead of vector of min and max).
	range_num <- function(data) {
	return(diff(range(data)))
	}

	# Summarize the spread of the data in a table.
	df <- data.frame()
	df[ncol(data),] <- NA
	rownames(df) <- names(data)
	iqr_rel <- apply(data, 2, IQR) / apply(data, 2, median)
	df$`IQR/Median` <- sprintf("%.1f%%", unlist(iqr_rel * 100))
	range_rel <- apply(data, 2, range_num) / apply(data, 2, median)
	df$`Range/Median` <- sprintf("%.1f%%", unlist(range_rel * 100))
	table <- pandoc.table.return(
	df, style="rmarkdown", justify=c("right", "left", "left"), emphasize.rownames=FALSE)
	handle <- file("results/summary.md")
	writeLines(table, handle)
	close(handle)
	#!/usr/bin/env python3

	import argparse
	import json
	import os
	import subprocess
	import tempfile
	import numpy
	import scipy.stats

	def format_s(seconds):
	"""Format a time in seconds like Criterion does."""

	if seconds < 0:
	return "-" + format_s(-seconds)

	def format_with_prefix(seconds, prefix):
	"""Format to 4 digits, even if they are trailing zeros."""
	if seconds >= 1e9:
	return "{:.4g} {}".format(seconds, prefix)
	for exponent in [3, 2, 1]:
	if seconds >= 10 ** exponent:
	return "{1:.{0}f} {2}".format(3 - exponent, seconds, prefix)
	return "{:.3f} {}".format(seconds, prefix)

	PREFIXES = [
	(0, ""),
	(-3, "m"),
	(-3, "m"),
	(-6, "μ"),
	(-9, "n"),
	(-12, "p"),
	(-15, "f"),
	(-18, "a")]

	for exponent, prefix in PREFIXES:
	if seconds >= 10 ** exponent:
	return format_with_prefix(seconds * 10 ** (-exponent), prefix + "s")
	return "{:g} s".format(seconds)

	def format_row(a, b, c=""):
	"""Format a row of output."""
	return "{: <20} {: <10} {}".format(a, b, c)

	def criterion_print_extra_stats(benchmark):
	"""Print some extra statistics that Criterion doesn't provide.

	`benchmark` should be a parsed JSON object describing a single benchmark
	from Criterion's output (tested on Criterion 1.2.3)."""

	# Extract columns which are interesting and should be non-null.
	keys = benchmark["reportKeys"]
	indices = {name: index for index, name in enumerate(keys)}
	def process(datum):
	return {key: datum[indices[key]]
	for key in ["time", "cpuTime", "iters"]}
	measured = list(map(process, benchmark["reportMeasured"]))

	# Criterion repeatedly executes the benchmarked code in a loop with an
	# increasing number of iterations. `time` and `cpuTime` are totals for the
	# loop and `iters` is the number of iterations.

	mean_times = [datum["time"] / datum["iters"] for datum in measured]

	print(format_row(
	"quartiles of means",
	", ".join(
	format_s(numpy.percentile(mean_times, p))
	for p in [25, 50, 75])))

	# Theil-Sen regression of time vs. number of iterations.
	slope, intercept, *_ = scipy.stats.theilslopes(
	[m["time"] for m in measured], [m["iters"] for m in measured])
	print(format_row(
	"Theil-Sen",
	format_s(slope),
	"(intercept: {})".format(format_s(intercept))))

	print(format_row("min of means", format_s(numpy.amin(mean_times))))

	def criterion_benchmark(command, time_limit_s=None):
	"""Benchmark a shell command using Criterion and print the results."""

	with tempfile.TemporaryDirectory(prefix="benchmark-") as dir_name:
	json_file = os.path.join(dir_name, "criterion-out.json")

	bench_command = ["bench"]
	if time_limit_s is not None:
	bench_command += ["--time-limit", str(time_limit_s)]
	bench_command += ["--json", json_file, "--", command]

	process = subprocess.run(bench_command, stdout=subprocess.PIPE)
	print(process.stdout.decode("utf-8").rstrip("\n"))

	with open(json_file, "r") as f:
	data = json.load(f)

	data = data[2] # Skip the header.
	assert len(data) == 1 # We're always doing a single benchmark.
	criterion_print_extra_stats(data[0])

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("command", help="sh command to benchmark")
	parser.add_argument(
	"--time-limit", type=int, default=60,
	help="time limit in seconds for the whole benchmark")
	args = parser.parse_args()

	criterion_benchmark(args.command, time_limit_s=args.time_limit)
	#!/usr/bin/env bash

	command='bash -c "a=0; for i in {1..500000}; do (( a += RANDOM )); done"'
	n_warmup_runs=5
	n_benchmarks=60
	single_benchmark_time=60

	require_cmd_present() {
	for cmd in "$@"; do
	if ! command -v -- "$cmd" >/dev/null 2>&1; then
	printf "ERROR: Required command \`%s\` not found.\n" "$cmd" 1>&2
	exit 1
	fi
	done
	}

	require_cmd_present python3 grep cut tr sed bench Rscript

	mkdir -p "results"

	single_benchmark() {
	./benchmark.py "$command" --time-limit "$single_benchmark_time" \|
	grep -E "time\|mean\|quartile\|Theil\|min" \|
	cut -c 22- \|
	cut -d\( -f1 \|
	tr -cd ".,\n0-9" \|
	tr "\n" "," \|
	sed "s/,$//"
	printf "\n"
	}

	{
	for ((i=0; i<"$n_warmup_runs"; i++)); do
	single_benchmark >/dev/null
	done

	echo "Least-squares slope,Mean,Quartile 1 of means,Median of means,Quartile 3 of means,Theil-Sen slope,Minimum of means"

	for ((i=0; i<"$n_benchmarks"; i++)); do
	single_benchmark
	done
	} >"results/results.csv"

	./analyze.R