Skip to content

Instantly share code, notes, and snippets.

@reinholdsson
Last active August 29, 2015 14:03
Show Gist options
  • Save reinholdsson/399f023bdf3bf80eb87a to your computer and use it in GitHub Desktop.
Save reinholdsson/399f023bdf3bf80eb87a to your computer and use it in GitHub Desktop.
R: (faster) single value dcast function
require(data.table)
single_val_dcast <- function(data, x, y, value, fill = NA) {
# x . .
# y
# .
# .
if (!is.data.table(data)) {
stop('Input data must be a data.table.')
}
if (nrow(data) != nrow(unique(data[, c(x, y), with = F]))) {
stop('All combinations of x and y must be unique!')
}
X <- data[[x]]
Y <- data[[y]]
VAL <- data[[value]]
# Temporarily keep in memory
y_class <- class(Y)
# Convert both variables to factor
X <- as.factor(X)
Y <- as.factor(Y)
# Temporarily save levels
x_levels <- levels(X)
y_levels <- levels(Y)
# Convert both variables to integer
X <- as.integer(X)
Y <- as.integer(Y)
# Fast version of dcast using matrix indexation
mx <- matrix(fill, nr = max(Y), ncol = max(X))
mx[cbind(Y, X)] <- VAL
res <- as.data.table(mx)
setnames(res, x_levels)
FUN <- switch(
y_class,
character = as.character,
integer = as.integer,
numeric = as.numeric,
factor = as.factor,
Date = as.Date
)
# Convert y back to original format
y_levels <- FUN(y_levels)
y_dt <- data.table(y_levels)
setnames(y_dt, y)
res <- cbind(y_dt, res)
return(res)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment