Created
August 24, 2019 07:50
-
-
Save karajan9/3beeeff3c639e959e09aa166140927ce to your computer and use it in GitHub Desktop.
Collect and plot data from the benchmarkgames website
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# %% | |
using HTTP | |
using Gumbo | |
using DataFrames | |
using StatsBase | |
# %% | |
url = "https://benchmarksgame-team.pages.debian.net/benchmarksgame/performance/" | |
benchmarks = [ | |
"revcomp", | |
"regexredux", | |
"fannkuchredux", | |
"mandelbrot", | |
"pidigits", | |
"nbody", | |
"spectralnorm", | |
"fasta", | |
"knucleotide", | |
"binarytrees", | |
] | |
# %% | |
datatmp = [] | |
for bm in benchmarks | |
@show bm | |
res = HTTP.get(url * bm * ".html") | |
html = parsehtml(res.body |> String) | |
body = html.root[2] | |
# get the table with the data and iterate through the rows | |
table = children(body[2][2][2][1])[2:end] | |
for (i, row) in table |> enumerate | |
# this checks whether we have a factor or the script failed/bad output | |
# etc.; no tryparse because row[1][1] might not even exist | |
factor = try | |
parse(Float64, string(row[1][1])) | |
catch | |
continue | |
end | |
# different handling for the best entry (bold) and normal ones | |
langtmp = children(row[2][1][1]) | |
if length(langtmp) == 1 && typeof(langtmp[1]) != HTMLElement{:strong} | |
best = false | |
lang = split(string(langtmp[1]))[1] | |
else | |
best = true | |
lang = filter(n -> typeof(n) === HTMLElement{:strong}, langtmp) |> | |
x -> x[1][1] |> string | |
end | |
gz = parse(Int, string(row[5][1])) | |
# just push everything in there, DataFrames will sort it out | |
# I haven't found the `log` option in DataVoyager, so just calculate it | |
push!(datatmp, (lang, factor, best, gz, log10(factor), log10(gz), | |
bm, factor * gz)) | |
end | |
end | |
# %% | |
df = DataFrame(datatmp) | |
names!(df, [:lang, :factor, :best, :gz, :logfactor, :loggz, :benchmark, | |
:factor_times_gz]) | |
# A bunch of problems arise because the bold lang names don't fit with the | |
# normal ones. Sometimes there are also a bunch on non-ASCII chars in there | |
# which break stuff. Fixing everything afterwards was the simpler solution. | |
# Also we got like 8 Ruby versions, I just grouped them together. | |
df[startswith.(df.lang, "Substrate"), :lang] .= "Substrate VM" | |
df[startswith.(df.lang, "Python"), :lang] .= "Python 3" | |
df[startswith.(df.lang, "Node"), :lang] .= "Node js" | |
df[startswith.(df.lang, "Free"), :lang] .= "Pascal" | |
df[startswith.(df.lang, "Pharo"), :lang] .= "Smalltalk" | |
df[startswith.(df.lang, "VW"), :lang] .= "Smalltalk" | |
df[startswith.(df.lang, "Matz's"), :lang] .= "Ruby" | |
df[startswith.(df.lang, "Truffle"), :lang] .= "Ruby" | |
df[startswith.(df.lang, "JRuby"), :lang] .= "Ruby" | |
# %% | |
# Exploration! | |
using DataVoyager | |
df |> println | |
df[df.best .== true, :] |> Voyager() | |
df |> Voyager() | |
# %% | |
using Plots | |
plot(xlabel = "(gzipped) code size", ylabel = "factor vs. best (geom. mean)", | |
legend = false) | |
best = df[df.best .== true, :] # only the best entry for each language | |
# iterate through each language and calculate the mean over all benchmarks | |
for l in df.lang |> unique | |
sel = best[best.lang .== l, :] | |
factor = geomean(sel.factor) | |
gz = mean(sel.gz) | |
@show l, factor, gz | |
scatter!([gz], [log10(factor)], label = l, ms = 5.0, | |
series_annotations = [Plots.text(l, :bottom, 10)]) | |
end | |
plot!(xlims = (0, 1600), ylims = (0, log10(100))) | |
# manual ticks, otherwise it's hard to read | |
ys = [1, 3, 5, 10, 30, 50, 100] | |
plot!(yticks = (log10.(ys), ys), yscale = :log10) | |
plot!(dpi = 300) | |
# savefig(plotsdir("size vs factor all langs.png")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment