Skip to content

Instantly share code, notes, and snippets.

@devmotion
Created August 5, 2018 14:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save devmotion/0866639f85ffb305e0485f00ae7f4025 to your computer and use it in GitHub Desktop.
Save devmotion/0866639f85ffb305e0485f00ae7f4025 to your computer and use it in GitHub Desktop.
Interpolation benchmarks
using MuladdMacro, EllipsisNotation, BenchmarkTools
# In place
@generated function f_loop!(out, dt, y, k, b, idxs::Nothing, ::Type{Val{N}}) where N
sumexpr = :(+($((:(k[$i][idx]*b[$i]) for i in 1:N)...)))
quote
@muladd @inbounds for idx in eachindex(out)
out[idx] = y[idx] + dt*$sumexpr
end
out
end
end
@generated function f_loop!(out, dt, y, k, b, idxs, ::Type{Val{N}}) where N
sumexpr = :(+($((:(k[$i][idx]*b[$i]) for i in 1:N)...)))
quote
@muladd @inbounds for (i,idx) in enumerate(idxs)
out[i] = y[idx] + dt*$sumexpr
end
out
end
end
@generated function f_broadcast!(out, dt, y, k, b, idxs::Nothing, ::Type{Val{N}}) where N
sumexpr = :(+($((:(k[$i]*b[$i]) for i in 1:N)...)))
quote
@muladd @. out = y + dt*$sumexpr
out
end
end
@generated function f_broadcast!(out, dt, y, k, b, idxs, ::Type{Val{N}}) where N
sumexpr = :(+($((:(k[$i][idxs]*b[$i]) for i in 1:N)...)))
quote
@muladd @views @. out = y[idxs] + dt*$sumexpr
out
end
end
# Out of place
@generated function f_broadcast(dt, y, k, b, idxs::Nothing, ::Type{Val{N}}) where N
sumexpr = :(+($((:(k[$i]*b[$i]) for i in 1:N)...)))
quote
@muladd @. y + dt*$sumexpr
end
end
@generated function f_broadcast(dt, y, k, b, idxs, ::Type{Val{N}}) where N
sumexpr = :(+($((:(k[$i][idxs]*b[$i]) for i in 1:N)...)))
quote
@muladd @views @. y[idxs] + dt*$sumexpr
end
end
# Display benchmark results
macro bdisp(expr)
quote
println($(expr.args[1]))
($BenchmarkTools.@benchmark $expr) |> display
println()
end
end
function run_benchmark(n, N)
@show n, N
out = rand(n); y = rand(n); allidxs = collect(1:n); dt = rand()
k = [rand(n) for _ in 1:N]; b = rand(N)
for idxs in (nothing, allidxs, ..)
@show idxs
idxs == Val{:..}() ||
@bdisp f_loop!($out, $dt, $y, $k, $b, $idxs, $(Val{N}))
@bdisp f_broadcast!($out, $dt, $y, $k, $b, $idxs, $(Val{N}))
@bdisp f_broadcast($dt, $y, $k, $b, $idxs, $(Val{N}))
end
end
for N in (2, 5, 10, 15)
run_benchmark(100, N)
end
(n, N) = (100, 2)
idxs = nothing
f_loop!
BenchmarkTools.Trial:
memory estimate: 16 bytes
allocs estimate: 1
--------------
minimum time: 178.517 ns (0.00% GC)
median time: 197.222 ns (0.00% GC)
mean time: 215.129 ns (3.40% GC)
maximum time: 49.360 μs (99.55% GC)
--------------
samples: 10000
evals/sample: 666
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 16 bytes
allocs estimate: 1
--------------
minimum time: 85.594 ns (0.00% GC)
median time: 99.060 ns (0.00% GC)
mean time: 107.006 ns (5.08% GC)
maximum time: 36.729 μs (99.63% GC)
--------------
samples: 10000
evals/sample: 956
f_broadcast
BenchmarkTools.Trial:
memory estimate: 976 bytes
allocs estimate: 4
--------------
minimum time: 649.410 ns (0.00% GC)
median time: 746.042 ns (0.00% GC)
mean time: 797.310 ns (6.68% GC)
maximum time: 221.633 μs (99.53% GC)
--------------
samples: 10000
evals/sample: 156
idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]
f_loop!
BenchmarkTools.Trial:
memory estimate: 16 bytes
allocs estimate: 1
--------------
minimum time: 207.893 ns (0.00% GC)
median time: 225.582 ns (0.00% GC)
mean time: 239.669 ns (3.81% GC)
maximum time: 62.751 μs (99.58% GC)
--------------
samples: 10000
evals/sample: 560
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 256 bytes
allocs estimate: 10
--------------
minimum time: 383.901 ns (0.00% GC)
median time: 426.470 ns (0.00% GC)
mean time: 490.538 ns (7.03% GC)
maximum time: 168.830 μs (99.66% GC)
--------------
samples: 10000
evals/sample: 202
f_broadcast
BenchmarkTools.Trial:
memory estimate: 1.19 KiB
allocs estimate: 13
--------------
minimum time: 918.516 ns (0.00% GC)
median time: 1.020 μs (0.00% GC)
mean time: 1.298 μs (15.81% GC)
maximum time: 1.114 ms (99.87% GC)
--------------
samples: 10000
evals/sample: 31
idxs = Val{:..}()
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 160 bytes
allocs estimate: 4
--------------
minimum time: 141.498 ns (0.00% GC)
median time: 158.581 ns (0.00% GC)
mean time: 173.669 ns (7.34% GC)
maximum time: 40.422 μs (99.50% GC)
--------------
samples: 10000
evals/sample: 846
f_broadcast
BenchmarkTools.Trial:
memory estimate: 1.09 KiB
allocs estimate: 7
--------------
minimum time: 722.526 ns (0.00% GC)
median time: 831.504 ns (0.00% GC)
mean time: 912.200 ns (9.31% GC)
maximum time: 254.465 μs (99.55% GC)
--------------
samples: 10000
evals/sample: 137
(n, N) = (100, 5)
idxs = nothing
f_loop!
BenchmarkTools.Trial:
memory estimate: 16 bytes
allocs estimate: 1
--------------
minimum time: 437.697 ns (0.00% GC)
median time: 482.343 ns (0.00% GC)
mean time: 499.144 ns (1.55% GC)
maximum time: 77.686 μs (99.34% GC)
--------------
samples: 10000
evals/sample: 198
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 16 bytes
allocs estimate: 1
--------------
minimum time: 373.800 ns (0.00% GC)
median time: 412.727 ns (0.00% GC)
mean time: 429.601 ns (1.72% GC)
maximum time: 74.530 μs (99.34% GC)
--------------
samples: 10000
evals/sample: 205
f_broadcast
BenchmarkTools.Trial:
memory estimate: 1.05 KiB
allocs estimate: 7
--------------
minimum time: 1.639 μs (0.00% GC)
median time: 1.840 μs (0.00% GC)
mean time: 2.454 μs (21.08% GC)
maximum time: 3.444 ms (99.86% GC)
--------------
samples: 10000
evals/sample: 10
idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]
f_loop!
BenchmarkTools.Trial:
memory estimate: 16 bytes
allocs estimate: 1
--------------
minimum time: 499.773 ns (0.00% GC)
median time: 551.374 ns (0.00% GC)
mean time: 570.722 ns (1.36% GC)
maximum time: 78.803 μs (98.79% GC)
--------------
samples: 10000
evals/sample: 194
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 496 bytes
allocs estimate: 19
--------------
minimum time: 1.047 μs (0.00% GC)
median time: 1.236 μs (0.00% GC)
mean time: 1.787 μs (28.36% GC)
maximum time: 3.607 ms (99.91% GC)
--------------
samples: 10000
evals/sample: 10
f_broadcast
BenchmarkTools.Trial:
memory estimate: 1.52 KiB
allocs estimate: 25
--------------
minimum time: 2.301 μs (0.00% GC)
median time: 2.493 μs (0.00% GC)
mean time: 3.258 μs (19.15% GC)
maximum time: 3.931 ms (99.85% GC)
--------------
samples: 10000
evals/sample: 9
idxs = Val{:..}()
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 304 bytes
allocs estimate: 7
--------------
minimum time: 482.703 ns (0.00% GC)
median time: 535.344 ns (0.00% GC)
mean time: 608.793 ns (6.67% GC)
maximum time: 180.092 μs (99.62% GC)
--------------
samples: 10000
evals/sample: 195
f_broadcast
BenchmarkTools.Trial:
memory estimate: 1.33 KiB
allocs estimate: 13
--------------
minimum time: 1.790 μs (0.00% GC)
median time: 1.952 μs (0.00% GC)
mean time: 2.645 μs (21.31% GC)
maximum time: 3.525 ms (99.86% GC)
--------------
samples: 10000
evals/sample: 10
(n, N) = (100, 10)
idxs = nothing
f_loop!
BenchmarkTools.Trial:
memory estimate: 16 bytes
allocs estimate: 1
--------------
minimum time: 1.076 μs (0.00% GC)
median time: 1.186 μs (0.00% GC)
mean time: 1.195 μs (0.00% GC)
maximum time: 4.528 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 10
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 1.02 KiB
allocs estimate: 42
--------------
minimum time: 1.343 μs (0.00% GC)
median time: 1.509 μs (0.00% GC)
mean time: 2.150 μs (25.13% GC)
maximum time: 3.582 ms (99.90% GC)
--------------
samples: 10000
evals/sample: 10
f_broadcast
BenchmarkTools.Trial:
memory estimate: 2.20 KiB
allocs estimate: 53
--------------
minimum time: 3.764 μs (0.00% GC)
median time: 4.188 μs (0.00% GC)
mean time: 5.135 μs (15.21% GC)
maximum time: 4.622 ms (99.83% GC)
--------------
samples: 10000
evals/sample: 8
idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]
f_loop!
BenchmarkTools.Trial:
memory estimate: 16 bytes
allocs estimate: 1
--------------
minimum time: 1.179 μs (0.00% GC)
median time: 1.464 μs (0.00% GC)
mean time: 1.591 μs (0.00% GC)
maximum time: 5.853 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 10
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 1.30 KiB
allocs estimate: 52
--------------
minimum time: 2.161 μs (0.00% GC)
median time: 2.403 μs (0.00% GC)
mean time: 3.137 μs (20.31% GC)
maximum time: 4.085 ms (99.87% GC)
--------------
samples: 10000
evals/sample: 9
f_broadcast
BenchmarkTools.Trial:
memory estimate: 2.48 KiB
allocs estimate: 63
--------------
minimum time: 4.557 μs (0.00% GC)
median time: 5.047 μs (0.00% GC)
mean time: 6.110 μs (14.95% GC)
maximum time: 5.513 ms (99.86% GC)
--------------
samples: 10000
evals/sample: 7
idxs = Val{:..}()
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 1.56 KiB
allocs estimate: 54
--------------
minimum time: 1.655 μs (0.00% GC)
median time: 1.869 μs (0.00% GC)
mean time: 2.574 μs (24.63% GC)
maximum time: 3.726 ms (99.91% GC)
--------------
samples: 10000
evals/sample: 10
f_broadcast
BenchmarkTools.Trial:
memory estimate: 2.75 KiB
allocs estimate: 65
--------------
minimum time: 4.032 μs (0.00% GC)
median time: 4.489 μs (0.00% GC)
mean time: 5.638 μs (17.18% GC)
maximum time: 5.577 ms (99.83% GC)
--------------
samples: 10000
evals/sample: 7
(n, N) = (100, 15)
idxs = nothing
f_loop!
BenchmarkTools.Trial:
memory estimate: 16 bytes
allocs estimate: 1
--------------
minimum time: 1.962 μs (0.00% GC)
median time: 2.154 μs (0.00% GC)
mean time: 2.163 μs (0.00% GC)
maximum time: 6.176 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 10
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 1.83 KiB
allocs estimate: 75
--------------
minimum time: 2.401 μs (0.00% GC)
median time: 2.622 μs (0.00% GC)
mean time: 3.412 μs (19.64% GC)
maximum time: 4.161 ms (99.91% GC)
--------------
samples: 10000
evals/sample: 9
f_broadcast
BenchmarkTools.Trial:
memory estimate: 3.17 KiB
allocs estimate: 91
--------------
minimum time: 5.987 μs (0.00% GC)
median time: 6.663 μs (0.00% GC)
mean time: 7.912 μs (13.44% GC)
maximum time: 6.188 ms (99.81% GC)
--------------
samples: 10000
evals/sample: 6
idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]
f_loop!
BenchmarkTools.Trial:
memory estimate: 16 bytes
allocs estimate: 1
--------------
minimum time: 2.101 μs (0.00% GC)
median time: 2.309 μs (0.00% GC)
mean time: 2.328 μs (0.00% GC)
maximum time: 8.207 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 9
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 2.67 KiB
allocs estimate: 106
--------------
minimum time: 3.759 μs (0.00% GC)
median time: 4.157 μs (0.00% GC)
mean time: 5.172 μs (16.93% GC)
maximum time: 4.780 ms (99.86% GC)
--------------
samples: 10000
evals/sample: 8
f_broadcast
BenchmarkTools.Trial:
memory estimate: 4.05 KiB
allocs estimate: 123
--------------
minimum time: 7.376 μs (0.00% GC)
median time: 8.204 μs (0.00% GC)
mean time: 10.058 μs (16.27% GC)
maximum time: 9.521 ms (99.83% GC)
--------------
samples: 10000
evals/sample: 4
idxs = Val{:..}()
f_broadcast!
BenchmarkTools.Trial:
memory estimate: 1.80 KiB
allocs estimate: 59
--------------
minimum time: 2.522 μs (0.00% GC)
median time: 2.812 μs (0.00% GC)
mean time: 3.657 μs (20.07% GC)
maximum time: 4.318 ms (99.91% GC)
--------------
samples: 10000
evals/sample: 9
f_broadcast
BenchmarkTools.Trial:
memory estimate: 3.31 KiB
allocs estimate: 82
--------------
minimum time: 6.044 μs (0.00% GC)
median time: 6.717 μs (0.00% GC)
mean time: 8.288 μs (16.05% GC)
maximum time: 7.751 ms (99.86% GC)
--------------
samples: 10000
evals/sample: 5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment