Last active
February 10, 2021 16:04
-
-
Save ManuelBlanc/2fca670ac3c0c9f24c4edeaf30ed12a7 to your computer and use it in GitHub Desktop.
ubmark.lua -- A collection of Lua microbenchmarks.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- | |
-- ubmark.lua -- A collection of Lua microbenchmarks | |
-- Targeted at LuaJIT 2.1.0-beta3. | |
-- | |
-- References: | |
-- https://cr.yp.to/talks/2015.04.16/slides-djb-20150416-a4.pdf | |
-- https://github.com/google/caliper/wiki/JavaMicrobenchmarks | |
-- http://www.ellipticgroup.com/html/benchmarkingArticle.html | |
-- http://hugoduncan.org/criterium/ | |
-- http://www.serpentine.com/criterion/ | |
-- https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test | |
-- https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test | |
local bmark = (function() -- Local wrapper. | |
local gc, pairs = collectgarbage, pairs | |
local abs, exp, floor, sqrt = math.abs, math.exp, math.floor, math.sqrt | |
local concat, sort = table.concat, table.sort | |
local format, match, sub = string.format, string.match, string.sub | |
local write = io.write | |
local clock = os.clock -- Can be overridden. | |
-- Measure the clock time of repeteadly executing a thunk. | |
local function timeR(reps, thunk, init) | |
local ud = init and init(reps) | |
local t0 = clock() | |
for i=1, reps, 16 do -- Unrolled. | |
thunk(ud, i ) thunk(ud, i+ 1) thunk(ud, i+ 2) thunk(ud, i+ 3) | |
thunk(ud, i+ 4) thunk(ud, i+ 5) thunk(ud, i+ 6) thunk(ud, i+ 7) | |
thunk(ud, i+ 8) thunk(ud, i+ 9) thunk(ud, i+10) thunk(ud, i+11) | |
thunk(ud, i+12) thunk(ud, i+13) thunk(ud, i+14) thunk(ud, i+15) | |
end | |
return clock() - t0 | |
end | |
-- Choose a suitable number of repetitions to profile a thunk. | |
local function pickR(thunk, init, tolerance) | |
local reps = 1024 | |
local n = 0 | |
for _=0, 10 do | |
reps = reps*(1 + exp(-n)) | |
if timeR(reps, thunk, init) > 1000e-6 then -- Tolerance in useconds. | |
if n == 5 then break end | |
n = n + 1 | |
elseif n > 0 then n = n - 1 | |
end | |
end | |
return reps - reps % 16 | |
end | |
-- Display a histogram for the given results. [ UNUSED ] | |
--[[ | |
local function histogram(r, bn, a, b) | |
local b = {} for i=1, bn do b[i] = 0 end | |
for i=1, #r do | |
local j = math.min(1+floor(r[i]*a + b), bn) | |
b[j] = b[j] + 1 | |
end | |
print("Histogram: "..r.case) | |
for i=1, bn do print(format("%2d| %s", i, string.rep("@", b[i]))) end | |
print "" | |
end | |
--]] | |
-- Measure the time and summarize executing a thunk a repeated number of times. | |
local function timeS(series, reps, thunk, init) | |
assert(series % 20 == 0 and reps % 16 == 0) | |
local r = {nil,nil,nil,nil} | |
for i=1, series do | |
gc("collect") gc("stop") if jit then jit.flush() end-- Collecting restarts the GC. | |
local t = timeR(reps, thunk, init) / reps | |
r[i] = t -- Insert-sort. | |
while i > 1 and r[i] < r[i-1] do r[i-1], r[i], i = t, r[i-1], i-1 end | |
end | |
gc("restart") | |
local a, b = 0.05*series, 0.95*series -- Trim both 5th quantiles. | |
local n = 0.90*series | |
local x = 0 -- Mean. Calculated after sorting to squeeze precision. | |
for i=a, b do x = x + r[i] end | |
x = x / n | |
local s2 = 0 -- Variance. | |
for i=a, b do local y = x - r[i] s2 = s2 + y*y end | |
r.mean = x | |
r.ops = x > 1e-10 and floor(1/x) or 0 | |
r.var = s2 / (n-1) | |
r.stdev = sqrt(r.var) | |
r.min = r[1] -- Percentile-based stats. | |
r.median = r[0.50*series] | |
r.q1 = r[0.25*series] | |
r.q3 = r[0.75*series] | |
r.iqr = r.q3 - r.q1 | |
return r | |
end | |
local function fmtSI(u, m) | |
if m >= 1 then | |
for i=0, 5 do | |
if m < 1000 then | |
return format("%.3f %s%s", m, sub("kMGTP", i, i), u) | |
end | |
m = m/1000 | |
end | |
return "inf" | |
elseif m > 0 then | |
for i=1, 5 do | |
m = m*1000 | |
if m >= 1 then | |
return format("%.3f %s%s", m, sub("munpf", i, i), u) | |
end | |
end | |
end | |
return "0.000 "..u | |
end | |
-- Compute the p-value for a normal difference. | |
local function erf(x) | |
x = abs(x)*2^-0.5 | |
local t = 1/(1+0.3275911*x) -- A&S Handbook of Mathematical Functions, Formula 7.1.26. | |
return (((((1.061405429*t-1.453152027)*t)+1.421413741)*t-0.284496736)*t+0.254829592)*t*exp(-x*x) | |
end | |
local function cmp_mean(a, b) return a.mean < b.mean end | |
local stats = { | |
"case" , format, "%s", | |
"mean" , fmtSI, "s", | |
"stdev" , fmtSI, "s", | |
"ops" , fmtSI, "Hz", | |
"reldiff", format, "%+.1f%%", | |
"pvalue" , format, "%.6f%%", | |
"tscore" , format, "%g", | |
"df" , format, "%d", | |
"min" , fmtSI, "s", | |
"median" , fmtSI, "s", | |
"iqr" , fmtSI, "s", | |
"reps" , fmtSI, "", | |
} | |
local skip_first = { reldiff=1, pvalue=1, tscore=1, df=1 } | |
-- Run a benchmark. | |
local function bmark(def) | |
local t0 = clock() | |
local data = {} | |
local series, init = def.series or 100, def.init | |
for case, thunk in pairs(def.cases) do | |
local reps = def.reps or pickR(thunk, init) | |
local r = timeS(series, reps, thunk, init) | |
r.case = case | |
r.reps = reps | |
data[#data+1] = r | |
end | |
sort(data, cmp_mean) | |
local b = data[1] -- Best. | |
for j=1, #data do | |
local r = data[j] | |
local v1, v2 = r.var, b.var | |
local v1_v2 = v1 + v2 | |
r.tscore = (r.mean - b.mean) * sqrt(series / v1_v2) | |
r.df = abs(floor(v1_v2*v1_v2 / (v1*v1 + v2*v2) * (series-1) + 0.5)) | |
r.reldiff = 100 * (r.mean/b.mean - 1) | |
r.pvalue = 100 * erf(r.tscore) | |
end | |
local duration = clock() - t0 | |
-- for i=1, #data do local r = data[i] histogram(r, 8, 8/(data[1][#r]-r[1])) end | |
write("\n", def.name or "no name!", "\n") | |
for i=1, #stats, 3 do | |
local key = stats[i] | |
local fmt = stats[i+1] | |
local ctx = stats[i+2] | |
write(format("%7s", key)) | |
for j=1, #data do | |
if j == 1 and skip_first[key] then write(", ") | |
else write(format(",%16s", fmt(ctx, data[j][key]))) | |
end | |
end | |
write("\n") | |
end | |
end | |
local queue = {} | |
local queue_only = false | |
local L = { | |
__call = bmark, | |
q = function(def) -- Queue a benchmark. | |
if def.off then return end | |
if def.only then queue_only = true end | |
queue[#queue+1] = def | |
end, | |
run = function() -- Run the queue. | |
for i=1, #queue do | |
local def = queue[i] | |
if queue_only ~= not def.only then | |
bmark(def) | |
end | |
end | |
end, | |
use_sys_clk = function() | |
local has_ffi, ffi = pcall(require, "ffi") | |
if not has_ffi then return nil end | |
local tonumber, C = tonumber, ffi.C | |
if jit.os == "Windows" then | |
ffi.cdef[[ | |
int QueryPerformanceFrequency(long long*); | |
int QueryPerformanceCounter(long long*); | |
]] | |
local freq = ffi.new("long long[1]") | |
local cnt = ffi.new("long long[1]") | |
C.QueryPerformanceFrequency(freq) | |
clock = function() | |
C.QueryPerformanceCounter(cnt); | |
return tonumber(freq[0] * cnt[0]) | |
end | |
return true | |
elseif jit.os == "Linux" then | |
ffi.cdef[[ | |
struct timespec { | |
uint64_t tv_sec; | |
uint64_t tv_nsec; | |
}; | |
int clock_gettime(int, struct timespec*); | |
]] | |
local tm = ffi.new("struct timespec[1]") | |
clock = function() | |
C.clock_gettime(1, tm) -- CLOCK_MONOTONIC. | |
return tonumber(tm[0].tv_sec) + tonumber(tm[0].tv_nsec)*1e-9 | |
end | |
return true | |
end | |
end | |
} | |
return setmetatable(L, L) | |
end)() -- /Local wrapper. | |
------------------------------------------------------------------------------- | |
local function ctrl_nop(_, i) return end | |
bmark.q{ name = "control", | |
reps = 2^18, | |
cases = { | |
["1a"] = ctrl_nop, | |
["1b"] = ctrl_nop, | |
["2a"] = function() end, | |
["2b"] = function() end, | |
}, | |
} | |
local sqrt = math.sqrt | |
bmark.q{ name = "bytecode:sqrt", | |
cases = { | |
["math.sqrt"] = function(_, i) return math.sqrt(i) end, | |
["uv-sqrt"] = function(_, i) return sqrt(i) end, | |
["pow-op"] = function(_, i) return i^0.5 end, | |
}, | |
} | |
bmark.q{ name = "bytecode:division", | |
cases = { | |
["mul"] = function(_, i) | |
local _ = i * (1/3) local _ = i * (1/3) | |
local _ = i * (1/3) local _ = i * (1/3) | |
local _ = i * (1/3) local _ = i * (1/3) | |
local _ = i * (1/3) local _ = i * (1/3) | |
end, | |
["div"] = function(_, i) | |
local _ = i / 3 local _ = i / 3 | |
local _ = i / 3 local _ = i / 3 | |
local _ = i / 3 local _ = i / 3 | |
local _ = i / 3 local _ = i / 3 | |
end, | |
}, | |
} | |
local max, huge, unpack = math.max, math.huge, unpack | |
bmark.q{ name = "bytecode:maximum", | |
init = function() | |
local t = { n = 100, aux = {} } | |
for i=1, t.n do t[i] = {math.random()} end | |
return t | |
end, | |
cases = { | |
["math.max"] = function(ud) | |
local m = -huge | |
for i=1, ud.n do m = max(m, ud[i][1]) end | |
end, | |
["lt-op"] = function(ud) | |
local m = -huge | |
for i=1, ud.n do | |
local x = ud[i][1] | |
if x > m then m = x end | |
end | |
end, | |
["unpack-max"] = function(ud) | |
local aux = ud.aux | |
for i=1, ud.n do aux[i] = ud[i][1] end | |
local _ = max(unpack(aux, 1, ud.n)) | |
end, | |
}, | |
} | |
bmark.q{ name = "bytecode:specialization", | |
cases = { | |
["no"] = function(_, i) -- MULVV | |
local five = 5 | |
local _ = i * five local _ = i * five | |
local _ = i * five local _ = i * five | |
local _ = i * five local _ = i * five | |
local _ = i * five local _ = i * five | |
local _ = i * five local _ = i * five | |
local _ = i * five local _ = i * five | |
local _ = i * five local _ = i * five | |
local _ = i * five local _ = i * five | |
end, | |
["yes"] = function(_, i) -- MULVN | |
local _ = i * 5 local _ = i * 5 | |
local _ = i * 5 local _ = i * 5 | |
local _ = i * 5 local _ = i * 5 | |
local _ = i * 5 local _ = i * 5 | |
local _ = i * 5 local _ = i * 5 | |
local _ = i * 5 local _ = i * 5 | |
local _ = i * 5 local _ = i * 5 | |
local _ = i * 5 local _ = i * 5 | |
end, | |
}, | |
} | |
bmark.q{ name = "storage:upvalue/table", | |
init = function() | |
local a | |
return { | |
a = a, | |
uset = function(x) a = x end, | |
tset = function(self, x) self.a = x end, | |
} | |
end, | |
cases = { | |
["upvalue"] = function(ud, i) ud.uset(i) end, | |
["table"] = function(ud, i) ud:tset(i) end, | |
}, | |
} | |
bmark.q{ name = "table:vivification", | |
init = function() | |
return { | |
t1 = {}, | |
t2 = setmetatable({}, { | |
__index = function(self, k) | |
self[k] = 0 | |
return 0 | |
end, | |
}) | |
} | |
end, | |
cases = { | |
["branch"] = function(ud) | |
local t = ud.t1 | |
for i=1, 128 do | |
local n = (i*37)%32 | |
t[n] = (t[n] or 0) + 1 | |
end | |
end, | |
["metatable"] = function(ud) | |
local t = ud.t2 | |
for i=1, 128 do | |
local n = (i*37)%32 | |
t[n] = t[n] + 1 | |
end | |
end, | |
}, | |
} | |
bmark.q{ name = "table:inflated", | |
cases = { | |
["no"] = function() | |
local a = {} | |
for i=1, 10 do a[i] = i end | |
end, | |
["yes"] = function() | |
local a = {nil,nil,nil,nil} | |
for i=1, 10 do a[i] = i end | |
end, | |
}, | |
} | |
bmark.q{ name = "bytecode:intermediate-slots", | |
init = function() | |
return { a = { b = { c = { d = { e = { f = 0 } } } } } } | |
end, | |
cases = { | |
["no"] = function(ud, i) | |
ud.a.b.c.d.e.f = i | |
end, | |
["yes"] = function(ud, i) | |
local a = ud.a | |
local b = a.b | |
local c = b.c | |
local d = c.d | |
local e = d.e | |
e.f = i | |
end, | |
}, | |
} | |
local next, ipairs, pairs = next, ipairs, pairs | |
bmark.q{ name = "table:iteration", | |
init = function() | |
local t = {} | |
for i=1, 1000 do t[i] = i end | |
return t | |
end, | |
cases = { | |
["for-n"] = function(ud) | |
local a = 0 | |
for i=1, #ud do a = a + ud[i] end | |
end, | |
["next"] = function(ud) | |
local a = 0 | |
for i in next, ud do a = a + ud[i] end | |
end, | |
["ipairs"] = function(ud) | |
local a = 0 | |
for _, v in ipairs(ud) do a = a + v end | |
end, | |
["pairs"] = function(ud) | |
local a = 0 | |
for _, v in pairs(ud) do a = a + v end | |
end, | |
}, | |
} | |
bmark.q{ name = "table:method/__call", | |
init = function() | |
local t = { __call = function() end } | |
return setmetatable(t, t) | |
end, | |
cases = { | |
["method"] = function(ud) | |
ud.__call() ud.__call() ud.__call() ud.__call() | |
ud.__call() ud.__call() ud.__call() ud.__call() | |
end, | |
["__call"] = function(ud) | |
ud() ud() ud() ud() | |
ud() ud() ud() ud() | |
end, | |
}, | |
} | |
local has_table_clear, clear_builtin = pcall(require, "table.clear") | |
if has_table_clear then | |
local function clear_lua(t) for k in pairs(t) do t[k] = nil end end | |
bmark.q{ name = "table:clear", | |
series = 20, | |
off = true, | |
init = function(reps) | |
local t = {} | |
for i=1, reps do | |
t[i] = {} | |
for j=1, 100 do | |
t[i][math.random()] = j | |
t[i][j] = j | |
end | |
end | |
return t | |
end, | |
cases = { | |
["builtin"] = function(ud, i) | |
clear_builtin(ud[i]) | |
end, | |
["lua"] = function(ud, i) | |
assert(ud[i], i) | |
clear_lua(ud[i]) | |
end, | |
} | |
} | |
end | |
bmark.q{ name = "storage:vararg", only = true, | |
init = function() | |
local index_lookup = { | |
one = 1, | |
two = 2, | |
three = 3, | |
four = 4, | |
five = 5, | |
six = 6, | |
seven = 7, | |
eight = 8, | |
} | |
local _arg_buffer = {} | |
local function read_from_vararg(...) | |
for i=1, select("#", ...) do | |
_arg_buffer[i] = select(i, ...) | |
end | |
local one = _arg_buffer[1] | |
local two = _arg_buffer[2] | |
local three = _arg_buffer[3] | |
local four = _arg_buffer[4] | |
end | |
local function read_from_indexed_array(t) | |
local one = t[ index_lookup.one ] | |
local two = t[ index_lookup.two ] | |
local three = t[ index_lookup.three ] | |
local four = t[ index_lookup.four ] | |
end | |
local function read_from_dictionary(t) | |
local one = t[ "one" ] | |
local two = t[ "two" ] | |
local three = t[ "three" ] | |
local four = t[ "four" ] | |
end | |
return { | |
index_lookup = index_lookup, | |
read_from_vararg = read_from_vararg, | |
read_from_indexed_array = read_from_indexed_array, | |
read_from_dictionary = read_from_dictionary, | |
array = {}, -- Reused. | |
dict = {}, -- Reused. | |
} | |
end, | |
cases = { | |
["array"] = function(ud) | |
ud.read_from_vararg("a", "b", 1, 2, 3, nil, false, "z") | |
end, | |
["lookup"] = function(ud, i) | |
local array, index_lookup = ud.array, ud.index_lookup | |
array[index_lookup.one ] = "a" | |
array[index_lookup.two ] = "b" | |
array[index_lookup.three] = 1 | |
array[index_lookup.four ] = 2 | |
array[index_lookup.five ] = 3 | |
array[index_lookup.six ] = nil | |
array[index_lookup.seven] = false | |
array[index_lookup.eight] = "z" | |
ud.read_from_indexed_array(array) | |
end, | |
["dict"] = function(ud, i) | |
local dict = ud.dict | |
dict.one = "a" | |
dict.two = "b" | |
dict.three = 1 | |
dict.four = 2 | |
dict.five = 3 | |
dict.six = nil | |
dict.seven = false | |
dict.eight = "z" | |
ud.read_from_dictionary(dict) | |
end, | |
}, | |
} | |
if jit then | |
local st = {jit.status()} | |
print(string.format([[ | |
Version : %s | |
Arch : %s | |
OS : %s | |
JIT : %s]], jit.version, jit.arch, jit.os, st[1] and table.concat(st, " ", 2) or "OFF")) | |
else | |
print(string.format("Version : %s", _VERSION)) | |
end | |
bmark.use_sys_clk() | |
bmark.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment