Created
May 27, 2010 18:53
-
-
Save CerebralMastication/416192 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#first let's set up a big table and a little table | |
bigTableRows <- 1e6 | |
numbers <- 0:500 | |
dim1 <- sample(numbers, bigTableRows, replace=T) | |
dim2 <- sample(numbers, bigTableRows, replace=T) | |
fact1 <- rnorm(bigTableRows) | |
bigTable <- data.frame(dim1, dim2, fact1, stringsAsFactors = F) | |
littleTable <- data.frame(dim1 =numbers, fact3 = rnorm(length(numbers)), stringsAsFactors = F ) | |
#now let's test merge() | |
system.time(mergedData <- merge(bigTable, littleTable)) | |
# data.table time! | |
# data.table requires keys to be integer | |
# so don't expect this to work with chars | |
require(data.table) | |
dtBigTable <- data.table(bigTable, key='dim1') | |
dtLittleTable <- data.table(littleTable, key='dim1') | |
system.time(dtMergedData <- merge(dtBigTable, dtLittleTable)[, 'dim1']) | |
# here comes sqldf() | |
require(sqldf) | |
#build the index | |
#do the merge | |
system.time(sqldfMergedData <-sqldf(c("create index bigIndex on bigTable(dim1)", | |
"create index littleIndex on littleTable(dim1)", | |
"select * from main.bigTable natural join main.littleTable"))) | |
# I thought I could do the index and merge in two different steps. The examples on the sqldf() project | |
# page certainly look that way: http://code.google.com/p/sqldf/ | |
# it seems the main database get's erased as soon as sqldf exits so all the steps have to be strung together | |
# so it's not really a good compare with data.table since dt creates the index in the conversion | |
# from data frame to data.table |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment