Skip to content

Instantly share code, notes, and snippets.

@mrdwab
Created March 8, 2018 17:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrdwab/8bfaa6198040e4e6608b006e3f6a8d2f to your computer and use it in GitHub Desktop.
Save mrdwab/8bfaa6198040e4e6608b006e3f6a8d2f to your computer and use it in GitHub Desktop.
Faster versions of `cSplit_e`, `numMat`, and `charMat`.
cSplit_e_new <- function (indt, splitCols, sep = ",", mode = "binary", type = "numeric",
drop = FALSE, fixed = TRUE, fill = NULL) {
indt <- setDT(copy(indt))
if (is.numeric(splitCols)) splitCols <- names(indt)[splitCols]
if (length(sep) == 1) sep <- rep(sep, length(splitCols))
if (length(sep) != length(splitCols)) stop("Wrong number of sep supplied")
if (length(mode) == 1) mode <- rep(mode, length(splitCols))
if (length(mode) != length(mode)) stop("Wrong number of mode supplied")
if (any(!mode %in% c("binary", "value", "count"))) {
stop("Mode must be `binary`, `value`, or `count`")
}
if (length(type) == 1) type <- rep(type, length(splitCols))
if (length(type) != length(type)) stop("Wrong number of type supplied")
for (i in seq_along(splitCols)) {
a <- strsplit(as.character(indt[[splitCols[i]]]), sep[i], fixed = fixed)
TYPE <- type[i]
if (TYPE == "character") a <- trim_list(a)
temp <- switch(
TYPE,
numeric = numMat(a, mode = mode[i], fill = fill),
character = charMat(a, mode = mode[i], fill = fill),
stop("type must be numeric or character"))
temp <- as.data.table(temp)
NAMES <- if (TYPE == "character") {
names(temp)
} else {
sprintf("%s_%s", splitCols[i], seq_along(temp))
}
set(indt, j = NAMES, value = temp)
if (isTRUE(drop)) set(indt, j = splitCols[i], value = NULL)
}
indt[]
}
numMat <- function (listOfValues, mode = "binary", fill = NULL) {
len <- length(listOfValues)
vec <- as.integer(unlist(listOfValues, use.names = FALSE))
slvl <- seq(min(vec, na.rm = TRUE), max(vec, na.rm = TRUE))
i.idx <- rep(seq_len(len), lengths(listOfValues))
if (mode %in% c("binary", "value")) {
if (is.null(fill)) {
fill <- if (mode == "binary") 0L else NA_integer_
}
out <- matrix(as.integer(fill), nrow = len, ncol = length(slvl),
dimnames = list(NULL, slvl))
j.idx <- match(vec, slvl)
out[na.omit(cbind(i.idx, j.idx))] <- switch(
mode, binary = 1L, value = na.omit(vec),
stop("'mode' must be 'binary' or 'value'"))
} else if (mode == "count") {
out <- unclass(table(i.idx, factor(vec, slvl)))
if (!is.null(fill)) {
out <- replace(out, out == 0, fill)
}
}
out
}
charMat <- function (listOfValues, mode = "binary", fill = NULL) {
len <- length(listOfValues)
vec <- unlist(listOfValues, use.names = FALSE)
lvl <- sort(unique(vec))
i.idx <- rep(seq.int(len), lengths(listOfValues))
if (mode %in% c("binary", "value")) {
fill <- if (is.null(fill)) {
if (mode == "binary") 0L else NA_integer_
} else {
fill
}
out <- matrix(as.integer(fill), nrow = len, ncol = length(lvl),
dimnames = list(NULL, lvl))
j.idx <- match(vec, lvl)
out[na.omit(cbind(i.idx, j.idx))] <- switch(
mode, binary = 1L, value = na.omit(vec),
stop("'mode' must be 'binary' or 'value'"))
} else if (mode == "count") {
out <- unclass(table(i.idx, factor(vec, lvl)))
if (!is.null(fill)) {
out <- replace(out, out == 0, fill)
}
}
out
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment