Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
dplyrでUNIONする(DB接続編)
# install.packages("DBI")
# install.packages("RSQLite")
# install.packages("nycflights13")
library(dplyr)
# DB接続
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
# DBにローカルのデータフレームをコピー
dplyr::copy_to(con, attenu, "attenu", temporary = FALSE)
# テーブル読み込み(copy_toの返り値を使ってもよい)
tbl_attenu <- dplyr::tbl(con, "attenu")
# 3種類のデータを読み込んでみる
(tbl_e1 <- tbl_attenu %>%
filter(event==1))
(tbl_e8 <- tbl_attenu %>%
filter(event==8))
(tbl_e11 <- tbl_attenu %>%
filter(event==11))
# Error
rbind(tbl_e1, tbl_e8)
# Error
dplyr::bind_rows(tbl_e1, tbl_e8)
# Success
dplyr::union(tbl_e1, tbl_e8)
# UNION と UNION ALL の違い
dplyr::union_all(tbl_e1, tbl_e1)
dplyr::union(tbl_e1, tbl_e1)
# 注意:UNIONは最初の2つの引数しか受け付けない
dplyr::union(tbl_e1, tbl_e8, tbl_e11)
# 3つ以上をUNIONしたいときは、リストをReduceでUNIONする
tbl_list <- list(tbl_e1, tbl_e8, tbl_e11)
Reduce(dplyr::union, tbl_list)
# foreachを使うと便利
library(foreach)
events <- c(1, 8, 11)
tbl_list <- foreach::foreach(
x=events,
.combine = list,
.multicombine = T # TRUEにする必要がある
) %do% {
dplyr::tbl(con, "attenu") %>%
filter(event == x)
}
Reduce(dplyr::union, tbl_list)
# foreachを使うと便利
tbl_result <- foreach::foreach(
x=events,
.combine = dplyr::union,
.multicombine = F # 今度はFALSEにしないと、最初の2つしかUNIONされない
) %do% {
dplyr::tbl(con, "attenu") %>%
filter(event == x)
}
tbl_result
# show_queryで作られるSQLが見られる
tbl_result %>%
dplyr::show_query()
# 結果を一度computeしてテーブルに格納しておくと便利
tbl_result <- tbl_result %>%
dplyr::compute(name="attenu_union")
# .final を使って一気にUNION->格納もできる
tbl_result <- foreach::foreach(
x=events,
.combine = dplyr::union,
.multicombine = F,
.final = function(y){dplyr::compute(y, name="attenu_union2")}
) %do% {
dplyr::tbl(con, "attenu") %>%
filter(event == x)
}
tbl_result
> # install.packages("DBI")
> # install.packages("RSQLite")
> library(dplyr)
>
> # DB接続
> con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
>
> # DBにローカルのデータフレームをコピー
> dplyr::copy_to(con, attenu, "attenu", temporary = FALSE)
>
> # テーブル読み込み(copy_toの返り値を使ってもよい)
> tbl_attenu <- dplyr::tbl(con, "attenu")
>
> # 3種類のデータを読み込んでみる
> (tbl_e1 <- tbl_attenu %>%
+ filter(event==1))
# Source: lazy query [?? x 5]
# Database: sqlite 3.19.3 []
event mag station dist accel
<dbl> <dbl> <chr> <dbl> <dbl>
1 1 7 117 12 0.359
>
> (tbl_e8 <- tbl_attenu %>%
+ filter(event==8))
# Source: lazy query [?? x 5]
# Database: sqlite 3.19.3 []
event mag station dist accel
<dbl> <dbl> <chr> <dbl> <dbl>
1 8 5.3 111 19 0.086
2 8 5.3 116 21 0.179
3 8 5.3 290 13 0.205
4 8 5.3 112 22 0.073
5 8 5.3 113 29 0.045
>
> (tbl_e11 <- tbl_attenu %>%
+ filter(event==11))
# Source: lazy query [?? x 5]
# Database: sqlite 3.19.3 []
event mag station dist accel
<dbl> <dbl> <chr> <dbl> <dbl>
1 11 7.7 2714 45 0.11
2 11 7.7 2708 145 0.01
3 11 7.7 2715 300 0.01
>
> # Error
> rbind(tbl_e1, tbl_e8)
src ops
tbl_e1 List,2 List,4
tbl_e8 List,2 List,4
> # Error
> dplyr::bind_rows(tbl_e1, tbl_e8)
Error in bind_rows_(x, .id) :
Argument 1 must be a data frame or a named atomic vector, not a tbl_dbi/tbl_sql/tbl_lazy/tbl
>
> # Success
> dplyr::union(tbl_e1, tbl_e8)
# Source: lazy query [?? x 5]
# Database: sqlite 3.19.3 []
event mag station dist accel
<dbl> <dbl> <chr> <dbl> <dbl>
1 1 7.0 117 12 0.359
2 8 5.3 111 19 0.086
3 8 5.3 112 22 0.073
4 8 5.3 113 29 0.045
5 8 5.3 116 21 0.179
6 8 5.3 290 13 0.205
>
> # UNION と UNION ALL の違い
> dplyr::union_all(tbl_e1, tbl_e1)
# Source: lazy query [?? x 5]
# Database: sqlite 3.19.3 []
event mag station dist accel
<dbl> <dbl> <chr> <dbl> <dbl>
1 1 7 117 12 0.359
2 1 7 117 12 0.359
> dplyr::union(tbl_e1, tbl_e1)
# Source: lazy query [?? x 5]
# Database: sqlite 3.19.3 []
event mag station dist accel
<dbl> <dbl> <chr> <dbl> <dbl>
1 1 7 117 12 0.359
>
> # 注意:UNIONは最初の2つの引数しか受け付けない
> dplyr::union(tbl_e1, tbl_e8, tbl_e11)
# Source: lazy query [?? x 5]
# Database: sqlite 3.19.3 []
event mag station dist accel
<dbl> <dbl> <chr> <dbl> <dbl>
1 1 7.0 117 12 0.359
2 8 5.3 111 19 0.086
3 8 5.3 112 22 0.073
4 8 5.3 113 29 0.045
5 8 5.3 116 21 0.179
6 8 5.3 290 13 0.205
>
> # 3つ以上をUNIONしたいときは、リストをReduceでUNIONする
> tbl_list <- list(tbl_e1, tbl_e8, tbl_e11)
> Reduce(dplyr::union, tbl_list)
# Source: lazy query [?? x 5]
# Database: sqlite 3.19.3 []
event mag station dist accel
<dbl> <dbl> <chr> <dbl> <dbl>
1 1 7.0 117 12 0.359
2 8 5.3 111 19 0.086
3 8 5.3 112 22 0.073
4 8 5.3 113 29 0.045
5 8 5.3 116 21 0.179
6 8 5.3 290 13 0.205
7 11 7.7 2708 145 0.010
8 11 7.7 2714 45 0.110
9 11 7.7 2715 300 0.010
>
> # foreachを使うと便利
> library(foreach)
> events <- c(1, 8, 11)
> tbl_list <- foreach::foreach(
+ x=events,
+ .combine = list,
+ .multicombine = T # TRUEにする必要がある
+ ) %do% {
+ dplyr::tbl(con, "attenu") %>%
+ filter(event == x)
+ }
> Reduce(dplyr::union, tbl_list)
# Source: lazy query [?? x 5]
# Database: sqlite 3.19.3 []
event mag station dist accel
<dbl> <dbl> <chr> <dbl> <dbl>
1 1 7.0 117 12 0.359
2 8 5.3 111 19 0.086
3 8 5.3 112 22 0.073
4 8 5.3 113 29 0.045
5 8 5.3 116 21 0.179
6 8 5.3 290 13 0.205
7 11 7.7 2708 145 0.010
8 11 7.7 2714 45 0.110
9 11 7.7 2715 300 0.010
>
> # foreachを使うと便利
> tbl_result <- foreach::foreach(
+ x=events,
+ .combine = dplyr::union,
+ .multicombine = F # 今度はFALSEにしないと、最初の2つしかUNIONされない
+ ) %do% {
+ dplyr::tbl(con, "attenu") %>%
+ filter(event == x)
+ }
> tbl_result
# Source: lazy query [?? x 5]
# Database: sqlite 3.19.3 []
event mag station dist accel
<dbl> <dbl> <chr> <dbl> <dbl>
1 1 7.0 117 12 0.359
2 8 5.3 111 19 0.086
3 8 5.3 112 22 0.073
4 8 5.3 113 29 0.045
5 8 5.3 116 21 0.179
6 8 5.3 290 13 0.205
7 11 7.7 2708 145 0.010
8 11 7.7 2714 45 0.110
9 11 7.7 2715 300 0.010
>
> # show_queryで作られるSQLが見られる
> tbl_result %>%
+ dplyr::show_query()
<SQL>
SELECT *
FROM (SELECT *
FROM `attenu`)
WHERE (`event` = 1.0)
UNION
SELECT *
FROM (SELECT *
FROM `attenu`)
WHERE (`event` = 8.0)
UNION
SELECT *
FROM (SELECT *
FROM `attenu`)
WHERE (`event` = 11.0)
>
> # 結果を一度computeしてテーブルに格納しておくと便利
> tbl_result <- tbl_result %>%
+ dplyr::compute(name="attenu_union")
>
> # .final を使って一気にUNION->格納もできる
> tbl_result <- foreach::foreach(
+ x=events,
+ .combine = dplyr::union,
+ .multicombine = F,
+ .final = function(y){dplyr::compute(y, name="attenu_union2")}
+ ) %do% {
+ dplyr::tbl(con, "attenu") %>%
+ filter(event == x)
+ }
> tbl_result
# Source: table<attenu_union2> [?? x 5]
# Database: sqlite 3.19.3 []
event mag station dist accel
<dbl> <dbl> <chr> <dbl> <dbl>
1 1 7.0 117 12 0.359
2 8 5.3 111 19 0.086
3 8 5.3 112 22 0.073
4 8 5.3 113 29 0.045
5 8 5.3 116 21 0.179
6 8 5.3 290 13 0.205
7 11 7.7 2708 145 0.010
8 11 7.7 2714 45 0.110
9 11 7.7 2715 300 0.010
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment