Skip to content

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
library(microbenchmark)
charBinaryMat <- function(listOfValues, fill = NA) {
lev <- sort(unique(unlist(listOfValues, use.names = FALSE)))
m <- matrix(fill, nrow = length(listOfValues), ncol = length(lev))
colnames(m) <- lev
for (i in 1:nrow(m)) {
m[i, listOfValues[[i]]] <- 1
}
m
}
reverseIn <- function(vector, value) {
return(value %in% vector)
}
buildCategoryMatrix <- function(valueVector) {
allClasses <- c()
for(classVec in unique(valueVector)) {
allClasses <- unique(c(allClasses,
strsplit(classVec, " ", fixed=TRUE)[[1]]))
}
resMatrix <- matrix(ncol=0, nrow=length(valueVector))
splitValues <- strsplit(valueVector, " ", fixed=TRUE)
for(cat in allClasses) {
if(cat=="") {
catIsPart <- (valueVector == "")
} else {
catIsPart <- sapply(splitValues, reverseIn, cat)
}
resMatrix <- cbind(resMatrix, catIsPart)
}
colnames(resMatrix) <- allClasses
return(resMatrix)
}
CBM <- function(str) {
charBinaryMat(strsplit(str, " ", fixed=TRUE), fill = 0)
}
BCM <- function(str) {
buildCategoryMatrix(str)*1L
}
Sapply <- function(str) {
y <- unique( unlist( strsplit( str , " " ) ) )
out <- t(sapply(str, function(x) y %in% unlist(strsplit(x , " " )),
USE.NAMES = FALSE )) * 1L
colnames(out) <- y
out
}
had <- function(x) {
lines <- strsplit(x, " ", fixed = TRUE)
all <- sort(unique(unlist(lines)))
t(vapply(lines, "%in%", x = all, numeric(length(all))))
}
set.seed(1)
A = sample(10, 1000, replace = TRUE)
str <- sapply(seq_along(A), function(x)
paste(sample(LETTERS[1:10], A[x]), collapse = " "))
head(str)
microbenchmark(CBM(str), BCM(str), Sapply(str), had(str), times=20)
# Unit: milliseconds
# expr min lq median uq max neval
# CBM(str) 2.780421 2.901480 2.957426 3.091195 9.443014 20
# BCM(str) 42.221266 44.834562 45.091333 46.691666 49.844484 20
# Sapply(str) 18.727119 19.106317 19.271673 20.679960 31.284702 20
# had(str) 3.139086 3.300337 3.404033 3.452204 5.381122 20
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.