Skip to content

Instantly share code, notes, and snippets.

@oxinabox
Last active October 11, 2020 20:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oxinabox/83bf01b3be7ff442cd13843ea7c029f3 to your computer and use it in GitHub Desktop.
Save oxinabox/83bf01b3be7ff442cd13843ea7c029f3 to your computer and use it in GitHub Desktop.
Conditionally setting a column in dataframes.jl
using DataFrames, BenchmarkTools, DataFramesMeta
eg_df(n=100_000) = DataFrame(a=rand(1:10, n), b=rand('a':'z', n), c=rand('A':'Z', n))
# for eachrow loop
# 7.132 ms (239156 allocations: 4.10 MiB)
@btime let
df = $(eg_df())
for row in eachrow(df)
if row.a == 1
row.b = row.c
end
end
end;
# masking
# 112.436 μs (25 allocations: 117.98 KiB)
@btime let
df = $(eg_df())
df[df.a .== 1, :b] .== df[df.a .== 1, :c]
end
# For indexed
# 6.678 ms (249279 allocations: 3.96 MiB)
@btime let
df = $(eg_df())
for ii in 1:nrow(df)
if df[ii, :a] == 1
df[ii, :b] = df[ii, :c]
end
end
end;
# For indexed inbounds
# 4.335 ms (139204 allocations: 2.28 MiB)
@btime let
df = $(eg_df())
@inbounds for ii in 1:nrow(df)
if df[ii, :a] == 1
df[ii, :b] = df[ii, :c]
end
end
df
end;
# Make a function
# 22.892 μs (0 allocations: 0 bytes)
function set_b!(a, b, c)
@inbounds for ii in eachindex(a)
if a[ii] == 1
b[ii] = c[ii]
end
end
end
@btime let
df = $(eg_df())
set_b!(df.a, df.b, df.c)
end;
# byrow!
# 248.458 μs (24 allocations: 1.53 MiB)
@btime let
df = $(eg_df())
@byrow! df begin
:b = :a == 1 ? :b : :c
end
end;
# @transform
# 109.362 μs (26 allocations: 1.91 MiB)
@btime let
df = $(eg_df())
@transform(df, b = ifelse.(:a .== 1, :b, :c))
end;
# transform ternery
# 103.689 μs (97 allocations: 1.53 MiB)
@btime let
df = $(eg_df())
transform(df, [:a, :b, :c] => ByRow((a, b,c) -> a == 1 ? b : c) => :b)
end;
# transform ifelse
# 100.875 μs (83 allocations: 1.53 MiB)
@btime let
df = $(eg_df())
transform(df, [:a, :b, :c] => ByRow((a, b,c) -> ifelse(a == 1, b, c)) => :b)
end;
# Make a function
# 23 μs (0 allocations: 0 bytes)
# transform ifelse
# 101 μs (83 allocations: 1.53 MiB)
# transform ternery
# 104 μs (97 allocations: 1.53 MiB)
# @transform
# 109 μs (26 allocations: 1.91 MiB)
# masking
# 112 μs (25 allocations: 117.98 KiB)
# byrow!
# 248 μs (24 allocations: 1.53 MiB)
# For indexed inbounds
# 4335 μs (139204 allocations: 2.28 MiB)
# For indexed
# 6678 μs (249279 allocations: 3.96 MiB)
# for eachrow loop
# 7132 μs (239156 allocations: 4.10 MiB)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment