Last active
March 11, 2020 14:58
-
-
Save pdeffebach/fd55c965b59ac783944d00e01bd8ace3 to your computer and use it in GitHub Desktop.
Performance of skipmissings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using BenchmarkTools, Missings | |
function make_x_y(N, p) | |
x = map(1:N) do _ | |
rand() < p ? missing : rand() | |
end | |
y = map(1:N) do _ | |
rand() < p ? missing : rand() | |
end | |
sx = skipmissing(x) | |
mx, my = skipmissings(x, y) | |
cx = [xx === missing ? rand() : xx for xx in x] | |
cy = [yy === missing ? rand() : yy for yy in y] | |
return (x = x, y = y, sx = sx, mx = mx, my = my, cx = cx, cy = cy) | |
end | |
function testperfzip(X, Y) | |
r = zero(eltype(X)) * zero(eltype(Y)) | |
for (x, y) in zip(X, Y) | |
r += x * y | |
end | |
r | |
end | |
function testperfzip_manual(X, Y) | |
r = zero(Missings.nonmissingtype(eltype(X))) * zero(Missings.nonmissingtype(eltype(Y))) | |
for (x, y) in zip(X, Y) | |
if x !== missing && y !== missing | |
r += x * y | |
end | |
end | |
r | |
end | |
function manual_sum_view(x, y) | |
sum(view(x, .!ismissing.(x) .& .!ismissing.(y))) | |
end | |
function manual_sum_index(x, y) | |
sum(x[.!ismissing.(x) .& .!ismissing.(y)]) | |
end | |
p = 0 | |
t_100 = make_x_y(100, p) | |
t_10_000 = make_x_y(10_000, p) | |
macro custombtime(expr) | |
:(println(" ", @sprintf("%.10e", @belapsed $expr), " seconds\n")) | |
end | |
println("============") | |
println("Proportion missing: $p") | |
println("============\n") | |
println("Sum:") | |
println("==========") | |
println("N = 100: skipmissings") | |
@custombtime sum($t_100.mx) | |
println("N = 100: vector") | |
@custombtime sum($t_100.cx) | |
println("N = 100: manual sum with view") | |
@custombtime manual_sum_view($t_100.x, $t_100.y) | |
println("N = 100: manual sum with index") | |
@custombtime manual_sum_index($t_100.x, $t_100.y) | |
println("N = 10_000: skipmissings") | |
@custombtime sum($t_10_000.mx) | |
println("N = 10_000: vector") | |
@custombtime sum($t_10_000.cx) | |
println("N = 10_000: manual sum with view") | |
@custombtime manual_sum_view($t_10_000.x, $t_10_000.y) | |
println("N = 10_000: manual sum with index") | |
@custombtime manual_sum_index($t_10_000.x, $t_10_000.y) | |
println("") | |
println("Zip") | |
println("==========") | |
println("N = 100: skipmissings") | |
@custombtime testperfzip($t_100.mx, $t_100.my) | |
println("N = 100: vector") | |
@custombtime testperfzip($t_100.cx, $t_100.cy) | |
println("N = 100: manual skipping missings") | |
@custombtime testperfzip($t_100.x, $t_100.y) | |
println("N = 10_000: skipmissings") | |
@custombtime testperfzip($t_10_000.mx, $t_10_000.my) | |
println("N = 10_000, vector") | |
@custombtime testperfzip($t_10_000.cx, $t_10_000.cy) | |
println("N = 10_000: manual skipping missings") | |
@custombtime testperfzip($t_10_000.x, $t_10_000.y) | |
println("") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment