Skip to content

Instantly share code, notes, and snippets.

@thisisnic
Last active September 8, 2022 15:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thisisnic/14fb9c1001261f2cf249f9317cda6466 to your computer and use it in GitHub Desktop.
Save thisisnic/14fb9c1001261f2cf249f9317cda6466 to your computer and use it in GitHub Desktop.
lazy_query from dbplyr
# query details copied from https://github.com/voltrondata-labs/arrowbench/blob/main/R/tpch-queries.R
query_results <- lineitem_db %>%
select(l_shipdate, l_returnflag, l_linestatus, l_quantity,
l_extendedprice, l_discount, l_tax) %>%
# kludge, should be: filter(l_shipdate <= "1998-12-01" - interval x day) %>%
# where x is between 60 and 120, 90 is the only one that will validate.
filter(l_shipdate <= as.Date("1998-09-02")) %>%
select(l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax) %>%
group_by(l_returnflag, l_linestatus) %>%
summarise(
sum_qty = sum(l_quantity),
sum_base_price = sum(l_extendedprice),
sum_disc_price = sum(l_extendedprice * (1 - l_discount)),
sum_charge = sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)),
avg_qty = mean(l_quantity),
avg_price = mean(l_extendedprice),
avg_disc = mean(l_discount),
count_order = n()
) %>%
ungroup() %>%
arrange(l_returnflag, l_linestatus)
# This is the output lazy_query
structure(
list(
x = structure(
list(
x = structure(
list(
x = structure(
list(
x = structure(
list(
x = structure("lineitem", class = c("ident",
"character")),
vars = c(
"l_orderkey",
"l_partkey",
"l_suppkey",
"l_linenumber",
"l_quantity",
"l_extendedprice",
"l_discount",
"l_tax",
"l_returnflag",
"l_linestatus",
"l_shipdate",
"l_commitdate",
"l_receiptdate",
"l_shipinstruct",
"l_shipmode",
"l_comment"
),
group_vars = character(0),
order_vars = NULL,
frame = NULL
),
class = c("lazy_base_remote_query",
"lazy_base_query", "lazy_query")
),
select = structure(
list(
name = c(
"l_shipdate",
"l_returnflag",
"l_linestatus",
"l_quantity",
"l_extendedprice",
"l_discount",
"l_tax"
),
expr = list(
l_shipdate,
l_returnflag,
l_linestatus,
l_quantity,
l_extendedprice,
l_discount,
l_tax
),
group_vars = list(NULL, NULL, NULL, NULL, NULL, NULL,
NULL),
order_vars = list(NULL, NULL, NULL, NULL,
NULL, NULL, NULL),
frame = list(NULL, NULL, NULL,
NULL, NULL, NULL, NULL)
),
class = c("tbl_df", "tbl",
"data.frame"),
row.names = c(NA,-7L)
),
where = NULL,
group_by = NULL,
order_by = NULL,
distinct = FALSE,
limit = NULL,
select_operation = "mutate",
last_op = "select",
message_summarise = NULL,
group_vars = character(0),
order_vars = NULL,
frame = NULL
),
class = c("lazy_select_query",
"lazy_query")
),
select = structure(
list(
name = c(
"l_shipdate",
"l_returnflag",
"l_linestatus",
"l_quantity",
"l_extendedprice",
"l_discount",
"l_tax"
),
expr = list(
l_shipdate,
l_returnflag,
l_linestatus,
l_quantity,
l_extendedprice,
l_discount,
l_tax
),
group_vars = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL),
order_vars = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL),
frame = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL)
),
class = c("tbl_df",
"tbl", "data.frame"),
row.names = c(NA,-7L)
),
where = structure(list(~ l_shipdate <= as.Date("1998-09-02")), names = ""),
group_by = NULL,
order_by = NULL,
distinct = FALSE,
limit = NULL,
select_operation = "mutate",
last_op = "filter",
message_summarise = NULL,
group_vars = character(0),
order_vars = NULL,
frame = NULL
),
class = c("lazy_select_query",
"lazy_query")
),
select = structure(
list(
name = c(
"l_returnflag",
"l_linestatus",
"l_quantity",
"l_extendedprice",
"l_discount",
"l_tax"
),
expr = list(
l_returnflag,
l_linestatus,
l_quantity,
l_extendedprice,
l_discount,
l_tax
),
group_vars = list(NULL,
NULL, NULL, NULL, NULL, NULL),
order_vars = list(NULL, NULL,
NULL, NULL, NULL, NULL),
frame = list(NULL, NULL, NULL, NULL,
NULL, NULL)
),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA,-6L)
),
where = NULL,
group_by = NULL,
order_by = NULL,
distinct = FALSE,
limit = NULL,
select_operation = "mutate",
last_op = "select",
message_summarise = NULL,
group_vars = c("l_returnflag",
"l_linestatus"),
order_vars = NULL,
frame = NULL
),
class = c("lazy_select_query",
"lazy_query")
),
select = structure(
list(
name = c(
"l_returnflag",
"l_linestatus",
"sum_qty",
"sum_base_price",
"sum_disc_price",
"sum_charge",
"avg_qty",
"avg_price",
"avg_disc",
"count_order"
),
expr = list(
l_returnflag,
l_linestatus,
~ sum(l_quantity),
~ sum(l_extendedprice),
~ sum(l_extendedprice * (1 - l_discount)),
~ sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)),
~ mean(l_quantity),
~ mean(l_extendedprice),
~ mean(l_discount),
~ n()
),
group_vars = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL),
order_vars = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL),
frame = list(NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL)
),
class = c("tbl_df",
"tbl", "data.frame"),
row.names = c(NA,-10L)
),
where = NULL,
group_by = list(l_returnflag, l_linestatus),
order_by = list(~ l_returnflag, ~ l_linestatus),
distinct = FALSE,
limit = NULL,
select_operation = "summarise",
last_op = "summarise",
message_summarise = "\033[38;5;232m`summarise()` has grouped output by \033[34m\"l_returnflag\"\033[38;5;232m. You can override using the `.groups` argument.\033[39m",
group_vars = character(0),
order_vars = list( ~ l_returnflag,
~ l_linestatus),
frame = NULL
),
class = c("lazy_select_query",
"lazy_query")
)
@paleolimbot
Copy link

paleolimbot commented Sep 8, 2022

Reprex :)

library(substrait, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
library(dbplyr)
#> 
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#> 
#>     ident, sql

con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
db_copy_to(con, "mtcars", mtcars)
#> [1] "mtcars"

query <- tbl(con, "mtcars") |> 
  select(mpg, disp)

str(query$lazy_query)
#> List of 13
#>  $ x                :List of 5
#>   ..$ x         : 'ident' chr "mtcars"
#>   ..$ vars      : chr [1:11] "mpg" "cyl" "disp" "hp" ...
#>   ..$ group_vars: chr(0) 
#>   ..$ order_vars: NULL
#>   ..$ frame     : NULL
#>   ..- attr(*, "class")= chr [1:3] "lazy_base_remote_query" "lazy_base_query" "lazy_query"
#>  $ select           : tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
#>   ..$ name      : chr [1:2] "mpg" "disp"
#>   ..$ expr      :List of 2
#>   .. ..$ : symbol mpg
#>   .. ..$ : symbol disp
#>   ..$ group_vars:List of 2
#>   .. ..$ : NULL
#>   .. ..$ : NULL
#>   ..$ order_vars:List of 2
#>   .. ..$ : NULL
#>   .. ..$ : NULL
#>   ..$ frame     :List of 2
#>   .. ..$ : NULL
#>   .. ..$ : NULL
#>  $ where            : NULL
#>  $ group_by         : NULL
#>  $ order_by         : NULL
#>  $ distinct         : logi FALSE
#>  $ limit            : NULL
#>  $ select_operation : chr "mutate"
#>  $ last_op          : chr "select"
#>  $ message_summarise: NULL
#>  $ group_vars       : chr(0) 
#>  $ order_vars       : NULL
#>  $ frame            : NULL
#>  - attr(*, "class")= chr [1:2] "lazy_select_query" "lazy_query"

Created on 2022-09-08 by the reprex package (v2.0.1)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment