library(data.table)
library(Hmisc)
set.seed(1)
# create data.table
n <- 100000
dt <- data.table(pop=runif(n), income=runif(n))
dt$group <- c(rep('A', n/2), rep('B', n/2))
dt$year <- sample(2000:2005, n, replace = T)
ola <- function(dt){
dt[, decile := cut(x = income,
breaks = Hmisc::wtd.quantile(x = income, weights=pop, probs=0:10/10,
type=c('quantile','(i-1)/(n-1)','i/(n+1)','i/n'),
normwt=FALSE, na.rm=T),
labels = FALSE, include.lowest = TRUE),
by = .(year, group) ]
}
oi <- function(dt){
dt %>%
group_by(year, group) %>%
mutate(
decile = cut(income, Hmisc::wtd.quantile(x = income, weights=pop, probs=0:10/10,
type=c('quantile','(i-1)/(n-1)','i/(n+1)','i/n'),
normwt=FALSE, na.rm=T), labels = FALSE, include.lowest = TRUE)
)
}
Testando
> microbenchmark::microbenchmark(oi(dt), ola(dt))
Unit: milliseconds
expr min lq mean median uq max neval
ola(dt) 23.3579 24.09535 30.62981 24.75475 34.00270 338.7722 100
oi(dt) 29.7886 30.61765 35.28524 31.99130 40.92295 48.7461 100