Created
August 21, 2017 09:20
-
-
Save nacnudus/ef3b22b79164bbf9c0ebafbf558f22a0 to your computer and use it in GitHub Desktop.
How to do joins with data.table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
?`[.data.table` | |
DT <- data.table(x=rep(c("b","a","c"),each=3), y=c(1,3,6), v=1:9) | |
X <- data.table(x=c("c","b"), v=8:7, foo=c(4,2)) | |
colnames(DT) | |
# [1] "x" "y" "v" | |
colnames(X) | |
# [1] "x" "v" "foo" | |
DT[X, on="x"] # * right join (DT is left, X is right) | |
# * SELECT DT RIGHT JOIN X ON DT$x = x$x | |
# * X$v is renamed i.v to avoid conflict with DT$v. | |
# The i comes from the fact that X is given as the | |
# 'i' argument | |
X[DT, on="x"] # left join (DT is left, X is right) | |
# SELECT X RIGHT JOIN DT ON X$x = DT$x | |
DT[X, on="x", nomatch=0] # inner join | |
# SELECT DT INNER JOIN X ON DT$x = X$x | |
DT[!X, on="x"] # not join (DT is left) | |
# SELECT DT LEFT JOIN X ON DT$x != X$x | |
DT[X, on=.(y<=foo)] # NEW non-equi join (v1.9.8+) | |
# SELECT DT RIGHT JOIN X ON DT$y <= X$foo | |
# * i.x and i.v are new names for the X$x and X$v, which is given as the i | |
# argument, that conflict with column names in DT. You can prove this to | |
# yourself by observing that there is no row in DT where x=b and v=7. | |
# * X$foo isn't returned at all, which is annoying, but is consistent with the | |
# right-hand column being dropped in merges, where it becomes redundant in the | |
# output. You can select it manually (along with any other column) like so: | |
DT[X, .(x, y, v, i.x, i.v, i.foo), on=.(x, y>=foo)] # Select columns to return, including ones that would otherwise be dropped | |
DT[X, on="y<=foo"] # same as above | |
DT[X, on=c("y<=foo")] # same as above | |
DT[X, on=.(y>=foo)] # NEW non-equi join (v1.9.8+) | |
DT[X, on=.(y>=foo)] # NEW non-equi join (v1.9.8+) | |
DT[X, .(x,y,x.y,v,i.foo), on=.(x, y>=foo)] # Select x's join columns as well | |
DT[X, on=.(x, y<=foo)] # NEW non-equi join (v1.9.8+) | |
# SELECT DT RIGHT JOIN X ON DT$x = X$x AND DT$y <= x$foo | |
DT[X, on=.(x, v>=v), sum(y)*foo, by=.EACHI] # NEW non-equi join with by=.EACHI (v1.9.8+) | |
# Is this doing GROUP BY DT$x, DT$v? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment