Skip to content

Instantly share code, notes, and snippets.

@dhersz
Last active July 23, 2020 22:01
Show Gist options
  • Save dhersz/0c89845623d824614d572ef7c1cee349 to your computer and use it in GitHub Desktop.
Save dhersz/0c89845623d824614d572ef7c1cee349 to your computer and use it in GitHub Desktop.
library(data.table)
library(Hmisc)

set.seed(1)

# create data.table
n <- 100000
dt <- data.table(pop=runif(n), income=runif(n))
dt$group <- c(rep('A', n/2), rep('B', n/2))
dt$year <- sample(2000:2005, n, replace = T)

ola <- function(dt){
  
  dt[, decile := cut(x = income, 
                     breaks = Hmisc::wtd.quantile(x = income, weights=pop, probs=0:10/10, 
                                                  type=c('quantile','(i-1)/(n-1)','i/(n+1)','i/n'), 
                                                  normwt=FALSE, na.rm=T),
                     labels = FALSE, include.lowest = TRUE), 
     by = .(year, group) ]
  
}

oi <- function(dt){
  
  dt %>%
    group_by(year, group) %>%
    mutate(
      decile = cut(income, Hmisc::wtd.quantile(x = income, weights=pop, probs=0:10/10,
                                               type=c('quantile','(i-1)/(n-1)','i/(n+1)','i/n'),
                                               normwt=FALSE, na.rm=T), labels = FALSE, include.lowest = TRUE)
    )
   
}

Testando

> microbenchmark::microbenchmark(oi(dt), ola(dt))
Unit: milliseconds
    expr     min       lq     mean   median       uq      max neval
 ola(dt) 23.3579 24.09535 30.62981 24.75475 34.00270 338.7722   100
  oi(dt) 29.7886 30.61765 35.28524 31.99130 40.92295  48.7461   100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment