public
Last active

Error message from using data.table on a very wide dataset

  • Download Gist
data.table.message.R
R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
rm(dtt) ## Make sure `dtt` isn't there to begin with
 
set.seed(1)
LLLL <- apply(expand.grid(LETTERS, LETTERS[10:15], LETTERS[1:20], LETTERS[1:5], stringsAsFactors=FALSE), 1, paste0, collapse="")
 
size <- 1000
dateSamples <- 1500
 
startDate <- as.Date("1980-01-01")
 
Name <- apply(matrix(LLLL[1:(2*size)], ncol=2), 1, paste0, collapse="")
Code <- sample(1e3:max(1e4-1, size+1e3), length(Name))
CURRENCY <- sample(c("USD", "EUR", "YEN"), length(Name), TRUE)
 
Dates <- seq(startDate, length.out=dateSamples, by="mon")
Values <- sample(c(1:1e2, 1:5e2), size=size*dateSamples, TRUE) / 1e2
 
# Calling the sample dataframe `data` to keep consistency, but I dont like this practice
data <- data.frame(Name, Code, CURRENCY,
matrix(Values, ncol = length(Dates),
dimnames = list(c(), as.character(Dates))))
 
library(data.table)
dtt <- data.table(data)
 
# non value columns, ie, the columns to keep post reshape
nvc <- c("Name","Code", "CURRENCY")
 
# name of columns being transformed
dateCols <- setdiff(names(data), nvc)
 
# use rbind list to combine subsets
 
#+------------------------------------+#
# WARNING MESSAGE #
#+------------------------------------+#
 
dtt2 <- rbindlist(lapply(dateCols, function(d) {
dtt[, Date := d]
cols <- c(nvc, "Date", d)
setnames(dtt[, cols, with=FALSE], cols, c(nvc, "Date", "value"))
}))
 
#+------------------------------------------------+#
# NO WARNING MESSAGE #
# Probably because `dtt` has already been modified #
#+------------------------------------------------+#
 
dtt3 <- rbindlist(lapply(dateCols, function(d) {
dtt[, Date := d]
cols <- c(nvc, "Date", d)
setnames(dtt[, cols, with=FALSE], cols, c(nvc, "Date", "value"))
}))

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.