Created
September 30, 2018 21:00
-
-
Save pdeffebach/931b3edc4a9a134f7e3a96bfde7068c8 to your computer and use it in GitHub Desktop.
DataFramesMeta tests
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using DataFrames, DataFramesMeta, BenchmarkTools | |
function scalarfun(g::GroupedDataFrame) | |
@transform(g, t = :b[1]) | |
end | |
function vectorfun(g::GroupedDataFrame) | |
@transform(g, t = :b .- 1.0) | |
end | |
g_many_groups = (DataFrame(a = [1:5000; 1:5000], b = randn(10_000))) |> (x -> groupby(x, :a)) | |
g_few_groups = (DataFrame(a = mod.(1:10_000, 10), b = randn(10_000))) |> (x -> groupby(x, :a)) | |
println("Results with many groups") | |
println("\t Scalar function:") | |
@btime scalarfun($g_many_groups); | |
println("\t Vector function") | |
@btime vectorfun($g_many_groups); | |
println("Results with few groups") | |
println("\t Scalar function:") | |
@btime scalarfun($g_few_groups); | |
println("\t Vector function:") | |
@btime vectorfun($g_few_groups); | |
println("Tests finished") | |
#= | |
] add DataFramesMeta/pdeffebach#grouped-transform-overhaul | |
New version | |
Results with many groups | |
Scalar function: | |
4.087 ms (74566 allocations: 3.43 MiB) | |
Vector function | |
7.476 ms (94566 allocations: 4.57 MiB) | |
Results with few groups | |
Scalar function: | |
151.623 μs (218 allocations: 635.28 KiB) | |
Vector function: | |
181.948 μs (278 allocations: 794.81 KiB) | |
Tests finished | |
=# | |
#= | |
] add DataFramesMeta#master | |
Old version | |
Results with many groups | |
Scalar function: | |
3.796 ms (84572 allocations: 3.66 MiB) | |
Vector function | |
4.055 ms (89572 allocations: 4.12 MiB) | |
Results with few groups | |
Scalar function: | |
146.958 μs (244 allocations: 636.19 KiB) | |
Vector function: | |
157.688 μs (254 allocations: 715.56 KiB) | |
Tests finished | |
=# |
Some stata comparisons
clear
local N = 10000
set obs `N'
gen a_many_groups = mod(_n, 5000)
gen a_few_groups = mod(_n, 10)
gen b = rnormal()
sort a_many_groups
timer on 1
by a_many_groups: gen t_many_groups_s = b[1]
timer off 1
timer on 2
by a_many_groups: gen t_many_groups_v = b
timer off 2
sort a_few_groups
timer on 3
by a_few_groups: gen t_few_groups_s = b[1]
timer off 3
sort a_few_groups
timer on 4
by a_few_groups: gen t_few_groups_v = b
timer off 4
dis "Many groups"
dis "Scalar function"
timer list 1
dis "Vector function"
timer list 2
dis "Few groups"
dis "Scalar function"
timer list 3
dis "Vector function"
timer list 4
timer clear 1
timer clear 2
timer clear 3
timer clear 4
***
Output (in seconds)
. timer list 1
1: 0.02 / 1 = 0.0170 // julia wins (17 vs 3 ms)
. dis "Vector function"
Vector function
. timer list 2
2: 0.01 / 1 = 0.0110 // julia wins (11 vs 4 ms)
.
. dis "Few groups"
Few groups
. dis "Scalar function"
Scalar function
. timer list 3
3: 0.00 / 1 = 0.0010 // julia loses
. dis "Vector function"
Vector function
. timer list 4
4: 0.00 / 1 = 0.0010 // julia loses
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Updated with the new recursive strategy