mdsumner/slow-split-in-nest.R

## slow-split-in-nest.R
data("wrld_simpl", package = "maptools")

w <- tibble::as_tibble(ggplot2::fortify(wrld_simpl[1:50, ]))

library(tidyr)
system.time(nest(w, -hole, -piece, -group, -id))
#user  system elapsed
#0.508   0.004   0.515

## down in tidyr:::nest_impl the group-idx is used like this, which
## takes all the time
system.time(split(w, w$group))
#user  system elapsed
#0.504   0.004   0.505

## if there's a faster split(d, idx) for data.frames that would be speed up
## (remember split sorts by "name" hence the final [unique(idx)] in nest_impl)
	data("wrld_simpl", package = "maptools")

	w <- tibble::as_tibble(ggplot2::fortify(wrld_simpl[1:50, ]))

	library(tidyr)
	system.time(nest(w, -hole, -piece, -group, -id))
	#user system elapsed
	#0.508 0.004 0.515

	## down in tidyr:::nest_impl the group-idx is used like this, which
	## takes all the time
	system.time(split(w, w$group))
	#user system elapsed
	#0.504 0.004 0.505

	## if there's a faster split(d, idx) for data.frames that would be speed up
	## (remember split sorts by "name" hence the final [unique(idx)] in nest_impl)