dplyrでUNIONする(DB接続編)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install.packages("DBI") | |
# install.packages("RSQLite") | |
# install.packages("nycflights13") | |
library(dplyr) | |
# DB接続 | |
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:") | |
# DBにローカルのデータフレームをコピー | |
dplyr::copy_to(con, attenu, "attenu", temporary = FALSE) | |
# テーブル読み込み(copy_toの返り値を使ってもよい) | |
tbl_attenu <- dplyr::tbl(con, "attenu") | |
# 3種類のデータを読み込んでみる | |
(tbl_e1 <- tbl_attenu %>% | |
filter(event==1)) | |
(tbl_e8 <- tbl_attenu %>% | |
filter(event==8)) | |
(tbl_e11 <- tbl_attenu %>% | |
filter(event==11)) | |
# Error | |
rbind(tbl_e1, tbl_e8) | |
# Error | |
dplyr::bind_rows(tbl_e1, tbl_e8) | |
# Success | |
dplyr::union(tbl_e1, tbl_e8) | |
# UNION と UNION ALL の違い | |
dplyr::union_all(tbl_e1, tbl_e1) | |
dplyr::union(tbl_e1, tbl_e1) | |
# 注意:UNIONは最初の2つの引数しか受け付けない | |
dplyr::union(tbl_e1, tbl_e8, tbl_e11) | |
# 3つ以上をUNIONしたいときは、リストをReduceでUNIONする | |
tbl_list <- list(tbl_e1, tbl_e8, tbl_e11) | |
Reduce(dplyr::union, tbl_list) | |
# foreachを使うと便利 | |
library(foreach) | |
events <- c(1, 8, 11) | |
tbl_list <- foreach::foreach( | |
x=events, | |
.combine = list, | |
.multicombine = T # TRUEにする必要がある | |
) %do% { | |
dplyr::tbl(con, "attenu") %>% | |
filter(event == x) | |
} | |
Reduce(dplyr::union, tbl_list) | |
# foreachを使うと便利 | |
tbl_result <- foreach::foreach( | |
x=events, | |
.combine = dplyr::union, | |
.multicombine = F # 今度はFALSEにしないと、最初の2つしかUNIONされない | |
) %do% { | |
dplyr::tbl(con, "attenu") %>% | |
filter(event == x) | |
} | |
tbl_result | |
# show_queryで作られるSQLが見られる | |
tbl_result %>% | |
dplyr::show_query() | |
# 結果を一度computeしてテーブルに格納しておくと便利 | |
tbl_result <- tbl_result %>% | |
dplyr::compute(name="attenu_union") | |
# .final を使って一気にUNION->格納もできる | |
tbl_result <- foreach::foreach( | |
x=events, | |
.combine = dplyr::union, | |
.multicombine = F, | |
.final = function(y){dplyr::compute(y, name="attenu_union2")} | |
) %do% { | |
dplyr::tbl(con, "attenu") %>% | |
filter(event == x) | |
} | |
tbl_result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> # install.packages("DBI") | |
> # install.packages("RSQLite") | |
> library(dplyr) | |
> | |
> # DB接続 | |
> con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:") | |
> | |
> # DBにローカルのデータフレームをコピー | |
> dplyr::copy_to(con, attenu, "attenu", temporary = FALSE) | |
> | |
> # テーブル読み込み(copy_toの返り値を使ってもよい) | |
> tbl_attenu <- dplyr::tbl(con, "attenu") | |
> | |
> # 3種類のデータを読み込んでみる | |
> (tbl_e1 <- tbl_attenu %>% | |
+ filter(event==1)) | |
# Source: lazy query [?? x 5] | |
# Database: sqlite 3.19.3 [] | |
event mag station dist accel | |
<dbl> <dbl> <chr> <dbl> <dbl> | |
1 1 7 117 12 0.359 | |
> | |
> (tbl_e8 <- tbl_attenu %>% | |
+ filter(event==8)) | |
# Source: lazy query [?? x 5] | |
# Database: sqlite 3.19.3 [] | |
event mag station dist accel | |
<dbl> <dbl> <chr> <dbl> <dbl> | |
1 8 5.3 111 19 0.086 | |
2 8 5.3 116 21 0.179 | |
3 8 5.3 290 13 0.205 | |
4 8 5.3 112 22 0.073 | |
5 8 5.3 113 29 0.045 | |
> | |
> (tbl_e11 <- tbl_attenu %>% | |
+ filter(event==11)) | |
# Source: lazy query [?? x 5] | |
# Database: sqlite 3.19.3 [] | |
event mag station dist accel | |
<dbl> <dbl> <chr> <dbl> <dbl> | |
1 11 7.7 2714 45 0.11 | |
2 11 7.7 2708 145 0.01 | |
3 11 7.7 2715 300 0.01 | |
> | |
> # Error | |
> rbind(tbl_e1, tbl_e8) | |
src ops | |
tbl_e1 List,2 List,4 | |
tbl_e8 List,2 List,4 | |
> # Error | |
> dplyr::bind_rows(tbl_e1, tbl_e8) | |
Error in bind_rows_(x, .id) : | |
Argument 1 must be a data frame or a named atomic vector, not a tbl_dbi/tbl_sql/tbl_lazy/tbl | |
> | |
> # Success | |
> dplyr::union(tbl_e1, tbl_e8) | |
# Source: lazy query [?? x 5] | |
# Database: sqlite 3.19.3 [] | |
event mag station dist accel | |
<dbl> <dbl> <chr> <dbl> <dbl> | |
1 1 7.0 117 12 0.359 | |
2 8 5.3 111 19 0.086 | |
3 8 5.3 112 22 0.073 | |
4 8 5.3 113 29 0.045 | |
5 8 5.3 116 21 0.179 | |
6 8 5.3 290 13 0.205 | |
> | |
> # UNION と UNION ALL の違い | |
> dplyr::union_all(tbl_e1, tbl_e1) | |
# Source: lazy query [?? x 5] | |
# Database: sqlite 3.19.3 [] | |
event mag station dist accel | |
<dbl> <dbl> <chr> <dbl> <dbl> | |
1 1 7 117 12 0.359 | |
2 1 7 117 12 0.359 | |
> dplyr::union(tbl_e1, tbl_e1) | |
# Source: lazy query [?? x 5] | |
# Database: sqlite 3.19.3 [] | |
event mag station dist accel | |
<dbl> <dbl> <chr> <dbl> <dbl> | |
1 1 7 117 12 0.359 | |
> | |
> # 注意:UNIONは最初の2つの引数しか受け付けない | |
> dplyr::union(tbl_e1, tbl_e8, tbl_e11) | |
# Source: lazy query [?? x 5] | |
# Database: sqlite 3.19.3 [] | |
event mag station dist accel | |
<dbl> <dbl> <chr> <dbl> <dbl> | |
1 1 7.0 117 12 0.359 | |
2 8 5.3 111 19 0.086 | |
3 8 5.3 112 22 0.073 | |
4 8 5.3 113 29 0.045 | |
5 8 5.3 116 21 0.179 | |
6 8 5.3 290 13 0.205 | |
> | |
> # 3つ以上をUNIONしたいときは、リストをReduceでUNIONする | |
> tbl_list <- list(tbl_e1, tbl_e8, tbl_e11) | |
> Reduce(dplyr::union, tbl_list) | |
# Source: lazy query [?? x 5] | |
# Database: sqlite 3.19.3 [] | |
event mag station dist accel | |
<dbl> <dbl> <chr> <dbl> <dbl> | |
1 1 7.0 117 12 0.359 | |
2 8 5.3 111 19 0.086 | |
3 8 5.3 112 22 0.073 | |
4 8 5.3 113 29 0.045 | |
5 8 5.3 116 21 0.179 | |
6 8 5.3 290 13 0.205 | |
7 11 7.7 2708 145 0.010 | |
8 11 7.7 2714 45 0.110 | |
9 11 7.7 2715 300 0.010 | |
> | |
> # foreachを使うと便利 | |
> library(foreach) | |
> events <- c(1, 8, 11) | |
> tbl_list <- foreach::foreach( | |
+ x=events, | |
+ .combine = list, | |
+ .multicombine = T # TRUEにする必要がある | |
+ ) %do% { | |
+ dplyr::tbl(con, "attenu") %>% | |
+ filter(event == x) | |
+ } | |
> Reduce(dplyr::union, tbl_list) | |
# Source: lazy query [?? x 5] | |
# Database: sqlite 3.19.3 [] | |
event mag station dist accel | |
<dbl> <dbl> <chr> <dbl> <dbl> | |
1 1 7.0 117 12 0.359 | |
2 8 5.3 111 19 0.086 | |
3 8 5.3 112 22 0.073 | |
4 8 5.3 113 29 0.045 | |
5 8 5.3 116 21 0.179 | |
6 8 5.3 290 13 0.205 | |
7 11 7.7 2708 145 0.010 | |
8 11 7.7 2714 45 0.110 | |
9 11 7.7 2715 300 0.010 | |
> | |
> # foreachを使うと便利 | |
> tbl_result <- foreach::foreach( | |
+ x=events, | |
+ .combine = dplyr::union, | |
+ .multicombine = F # 今度はFALSEにしないと、最初の2つしかUNIONされない | |
+ ) %do% { | |
+ dplyr::tbl(con, "attenu") %>% | |
+ filter(event == x) | |
+ } | |
> tbl_result | |
# Source: lazy query [?? x 5] | |
# Database: sqlite 3.19.3 [] | |
event mag station dist accel | |
<dbl> <dbl> <chr> <dbl> <dbl> | |
1 1 7.0 117 12 0.359 | |
2 8 5.3 111 19 0.086 | |
3 8 5.3 112 22 0.073 | |
4 8 5.3 113 29 0.045 | |
5 8 5.3 116 21 0.179 | |
6 8 5.3 290 13 0.205 | |
7 11 7.7 2708 145 0.010 | |
8 11 7.7 2714 45 0.110 | |
9 11 7.7 2715 300 0.010 | |
> | |
> # show_queryで作られるSQLが見られる | |
> tbl_result %>% | |
+ dplyr::show_query() | |
<SQL> | |
SELECT * | |
FROM (SELECT * | |
FROM `attenu`) | |
WHERE (`event` = 1.0) | |
UNION | |
SELECT * | |
FROM (SELECT * | |
FROM `attenu`) | |
WHERE (`event` = 8.0) | |
UNION | |
SELECT * | |
FROM (SELECT * | |
FROM `attenu`) | |
WHERE (`event` = 11.0) | |
> | |
> # 結果を一度computeしてテーブルに格納しておくと便利 | |
> tbl_result <- tbl_result %>% | |
+ dplyr::compute(name="attenu_union") | |
> | |
> # .final を使って一気にUNION->格納もできる | |
> tbl_result <- foreach::foreach( | |
+ x=events, | |
+ .combine = dplyr::union, | |
+ .multicombine = F, | |
+ .final = function(y){dplyr::compute(y, name="attenu_union2")} | |
+ ) %do% { | |
+ dplyr::tbl(con, "attenu") %>% | |
+ filter(event == x) | |
+ } | |
> tbl_result | |
# Source: table<attenu_union2> [?? x 5] | |
# Database: sqlite 3.19.3 [] | |
event mag station dist accel | |
<dbl> <dbl> <chr> <dbl> <dbl> | |
1 1 7.0 117 12 0.359 | |
2 8 5.3 111 19 0.086 | |
3 8 5.3 112 22 0.073 | |
4 8 5.3 113 29 0.045 | |
5 8 5.3 116 21 0.179 | |
6 8 5.3 290 13 0.205 | |
7 11 7.7 2708 145 0.010 | |
8 11 7.7 2714 45 0.110 | |
9 11 7.7 2715 300 0.010 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment