Last active
March 24, 2016 13:23
Comparing list of vector to count table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
G <- list(c("MD", "DT", "NN", "VB", "VBG", "TO", "POS"), c("DT", "NN", | |
"JJ", "RB"), c("RB", "TO", "PRP"), c("VBZ", "PRP", "VBG", "RB", | |
"TO", "NN"), c("NN", "NN")) | |
G <- rep(G, 1000) | |
library(qdapTools) | |
JORAN1 <- function(){ | |
lev <- sort(unique(unlist(G))) | |
do.call(rbind,lapply(G,function(x,lev){ table(factor(x,levels = lev, | |
ordered = TRUE))},lev = lev)) | |
} | |
JORAN2 <- function(){ | |
lev <- sort(unique(unlist(G))) | |
do.call(rbind,lapply(G,function(x,lev){ tabulate(factor(x,levels = lev, | |
ordered = TRUE),nbins = length(lev))},lev = lev)) | |
} | |
RINKER <- function() mtabulate(G) | |
TIM <- function(){ | |
Tags <- sort(unique(unlist(G))) | |
t(vapply(G,function(x){ | |
a <- Tags %in% x | |
a[a] <- tapply(x %in% Tags,x,sum) | |
a | |
}, FUN.VALUE = rep(0,length(Tags)))) | |
} | |
MDSUMMER <- function(){ | |
P <- lapply(G, function(x) table(sort(x))) | |
levs <- sort(unique(names(unlist(P)))) | |
do.call("rbind", lapply(G, function(x) table(factor(x, levs)))) | |
} | |
microbenchmark( | |
JORAN1(), | |
JORAN2(), | |
RINKER(), | |
TIM(), | |
MDSUMMER() | |
) | |
expr min lq mean median uq max neval | |
JORAN1() 648.04435 689.16756 714.9142 712.59122 732.4991 831.6623 100 | |
JORAN2() 86.83879 92.91911 98.7068 97.44690 101.6764 177.4228 100 | |
RINKER() 87.40797 94.07564 100.1154 98.39624 104.0887 177.3146 100 | |
TIM() 900.65847 964.23419 993.9475 988.89306 1023.0587 1137.6263 100 | |
MDSUMMER() 1395.95920 1487.45279 1527.3181 1527.92664 1571.0997 1685.3298 100 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment