Skip to content

Instantly share code, notes, and snippets.

@mrdwab
Last active August 29, 2015 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrdwab/3865d516fcc575d7099c to your computer and use it in GitHub Desktop.
Save mrdwab/3865d516fcc575d7099c to your computer and use it in GitHub Desktop.
Even faster version of cSplit....
cSplit <- function(indt, splitCols, sep = ",", direction = "wide", fixed = TRUE,
drop = TRUE, stripWhite = TRUE, type.convert = TRUE) {
require(stringi)
require(splitstackshape)
Trim <- function(invec) {
`dim<-`(stri_trim_both(invec), dim(invec))
}
padNAcols <- function(inmat, colsneeded) {
colsNeeded <- colsneeded - ncol(inmat)
if (colsNeeded == 0) {
inmat
} else {
cbind(inmat, matrix(NA, nrow = nrow(inmat), ncol = colsNeeded))
}
}
if (!is.data.table(indt))
indt <- as.data.table(indt)
else indt <- copy(indt)
if (is.numeric(splitCols))
splitCols <- splitstackshape:::Names(indt, splitCols)
if (length(sep) == 1)
sep <- rep(sep, length(splitCols))
if (length(sep) != length(splitCols)) {
stop("Verify you have entered the correct number of sep")
}
switch(
direction,
wide = {
X <- lapply(seq_along(splitCols), function(x) {
temp1 <- stri_split_fixed(indt[[splitCols[x]]], sep[x],
simplify = TRUE, omit_empty = TRUE)
if (isTRUE(stripWhite)) temp1 <- Trim(temp1)
temp1 <- as.data.table(temp1)
setnames(temp1, paste(splitCols[x], splitstackshape:::.pad(
sequence(ncol(temp1))), sep = "_"))
if (isTRUE(type.convert)) temp1 <- temp1[, lapply(.SD, type.convert)]
temp1
})
out <- cbind(indt, do.call(cbind, X))
if (isTRUE(drop)) out[, (splitCols) := NULL][]
},
long = {
Y <- lapply(seq_along(splitCols), function(x) {
temp1 <- stri_split_fixed(indt[[splitCols[x]]], sep[x],
simplify = TRUE, omit_empty = TRUE)
})
Ncols <- max(vapply(Y, ncol, 1L))
Y <- lapply(Y, function(x) {
out <- c(t(padNAcols(x, Ncols)))
if (isTRUE(stripWhite)) out <- stri_trim_both(out)
out
})
YDT <- as.data.table(Y)
setnames(YDT, paste0(splitCols, "_new"))
if (isTRUE(type.convert)) YDT <- YDT[, lapply(.SD, type.convert)]
out <- cbind(expandRows(indt, Ncols, count.is.col = FALSE), YDT)
if (isTRUE(drop)) out[, (splitCols) := NULL][]
})
}
@mrdwab
Copy link
Author

mrdwab commented Oct 24, 2014

To do:

[X] Add the direction = "long" setting.
[ ] Remind self of what the makeEqual setting did. *Seems to be irrelevant--no need to pad empty columns, is there? Needed for direction = "long" when splitting multiple columns at once.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment