Skip to content

Instantly share code, notes, and snippets.

@pdeffebach
Last active March 11, 2020 14:58
Show Gist options
  • Save pdeffebach/fd55c965b59ac783944d00e01bd8ace3 to your computer and use it in GitHub Desktop.
Save pdeffebach/fd55c965b59ac783944d00e01bd8ace3 to your computer and use it in GitHub Desktop.
Performance of skipmissings
using BenchmarkTools, Missings
function make_x_y(N, p)
x = map(1:N) do _
rand() < p ? missing : rand()
end
y = map(1:N) do _
rand() < p ? missing : rand()
end
sx = skipmissing(x)
mx, my = skipmissings(x, y)
cx = [xx === missing ? rand() : xx for xx in x]
cy = [yy === missing ? rand() : yy for yy in y]
return (x = x, y = y, sx = sx, mx = mx, my = my, cx = cx, cy = cy)
end
function testperfzip(X, Y)
r = zero(eltype(X)) * zero(eltype(Y))
for (x, y) in zip(X, Y)
r += x * y
end
r
end
function testperfzip_manual(X, Y)
r = zero(Missings.nonmissingtype(eltype(X))) * zero(Missings.nonmissingtype(eltype(Y)))
for (x, y) in zip(X, Y)
if x !== missing && y !== missing
r += x * y
end
end
r
end
function manual_sum_view(x, y)
sum(view(x, .!ismissing.(x) .& .!ismissing.(y)))
end
function manual_sum_index(x, y)
sum(x[.!ismissing.(x) .& .!ismissing.(y)])
end
p = 0
t_100 = make_x_y(100, p)
t_10_000 = make_x_y(10_000, p)
macro custombtime(expr)
:(println(" ", @sprintf("%.10e", @belapsed $expr), " seconds\n"))
end
println("============")
println("Proportion missing: $p")
println("============\n")
println("Sum:")
println("==========")
println("N = 100: skipmissings")
@custombtime sum($t_100.mx)
println("N = 100: vector")
@custombtime sum($t_100.cx)
println("N = 100: manual sum with view")
@custombtime manual_sum_view($t_100.x, $t_100.y)
println("N = 100: manual sum with index")
@custombtime manual_sum_index($t_100.x, $t_100.y)
println("N = 10_000: skipmissings")
@custombtime sum($t_10_000.mx)
println("N = 10_000: vector")
@custombtime sum($t_10_000.cx)
println("N = 10_000: manual sum with view")
@custombtime manual_sum_view($t_10_000.x, $t_10_000.y)
println("N = 10_000: manual sum with index")
@custombtime manual_sum_index($t_10_000.x, $t_10_000.y)
println("")
println("Zip")
println("==========")
println("N = 100: skipmissings")
@custombtime testperfzip($t_100.mx, $t_100.my)
println("N = 100: vector")
@custombtime testperfzip($t_100.cx, $t_100.cy)
println("N = 100: manual skipping missings")
@custombtime testperfzip($t_100.x, $t_100.y)
println("N = 10_000: skipmissings")
@custombtime testperfzip($t_10_000.mx, $t_10_000.my)
println("N = 10_000, vector")
@custombtime testperfzip($t_10_000.cx, $t_10_000.cy)
println("N = 10_000: manual skipping missings")
@custombtime testperfzip($t_10_000.x, $t_10_000.y)
println("")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment