Skip to content

Instantly share code, notes, and snippets.

@trinker
Last active March 24, 2016 13:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save trinker/91802b8c4ba759034881 to your computer and use it in GitHub Desktop.
Save trinker/91802b8c4ba759034881 to your computer and use it in GitHub Desktop.
Comparing list of vector to count table
G <- list(c("MD", "DT", "NN", "VB", "VBG", "TO", "POS"), c("DT", "NN",
"JJ", "RB"), c("RB", "TO", "PRP"), c("VBZ", "PRP", "VBG", "RB",
"TO", "NN"), c("NN", "NN"))
G <- rep(G, 1000)
library(qdapTools)
JORAN1 <- function(){
lev <- sort(unique(unlist(G)))
do.call(rbind,lapply(G,function(x,lev){ table(factor(x,levels = lev,
ordered = TRUE))},lev = lev))
}
JORAN2 <- function(){
lev <- sort(unique(unlist(G)))
do.call(rbind,lapply(G,function(x,lev){ tabulate(factor(x,levels = lev,
ordered = TRUE),nbins = length(lev))},lev = lev))
}
RINKER <- function() mtabulate(G)
TIM <- function(){
Tags <- sort(unique(unlist(G)))
t(vapply(G,function(x){
a <- Tags %in% x
a[a] <- tapply(x %in% Tags,x,sum)
a
}, FUN.VALUE = rep(0,length(Tags))))
}
MDSUMMER <- function(){
P <- lapply(G, function(x) table(sort(x)))
levs <- sort(unique(names(unlist(P))))
do.call("rbind", lapply(G, function(x) table(factor(x, levs))))
}
microbenchmark(
JORAN1(),
JORAN2(),
RINKER(),
TIM(),
MDSUMMER()
)
expr min lq mean median uq max neval
JORAN1() 648.04435 689.16756 714.9142 712.59122 732.4991 831.6623 100
JORAN2() 86.83879 92.91911 98.7068 97.44690 101.6764 177.4228 100
RINKER() 87.40797 94.07564 100.1154 98.39624 104.0887 177.3146 100
TIM() 900.65847 964.23419 993.9475 988.89306 1023.0587 1137.6263 100
MDSUMMER() 1395.95920 1487.45279 1527.3181 1527.92664 1571.0997 1685.3298 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment