Last active
February 10, 2017 02:58
-
-
Save tomhopper/9a424c24c6fb60b267ae to your computer and use it in GitHub Desktop.
Merge two data.tables and eliminate duplicated rows
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
# See \link{http://stackoverflow.com/questions/11792527/filtering-out-duplicated-non-unique-rows-in-data-table} | |
# for a discussion of how to eliminate duplicate rows. | |
# The problem is that the \code{unique()} function will use a key, if it exists. We need to | |
# eliminate the key. | |
# Create one column of data | |
temp1 <- data.table(sample(letters,size = 15, replace = FALSE)) | |
temp2 <- data.table(sample(letters,size = 15, replace = FALSE)) | |
# Merge the data.table | |
temp3 <- rbind(temp1, temp2) | |
# By listing the columns, unique() treats temp3 as having no key. | |
temp3 <- unique(temp3[,list(V1)]) | |
# Create two or more columns of data | |
temp1 <- data.table(sample(letters,size = 50, replace = TRUE), sample(letters,size = 50, replace = TRUE)) | |
temp2 <- data.table(sample(letters,size = 50, replace = TRUE), sample(letters,size = 50, replace = TRUE)) | |
# Merge and eliminate duplicates | |
temp3 <- rbind(temp1, temp2) | |
temp3 <- unique(temp3[,list(V1, V2)]) | |
# Merge, set the key to null, then eliminate duplicates | |
temp3 <- rbind(temp1, temp2) | |
temp3 <- setkey(temp3, NULL) | |
temp3 <- unique(temp3) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment