Skip to content

Instantly share code, notes, and snippets.

@nischalshrestha
Last active July 10, 2023 23:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nischalshrestha/672125b1327770ab7061bbf6918b4a62 to your computer and use it in GitHub Desktop.
Save nischalshrestha/672125b1327770ab7061bbf6918b4a62 to your computer and use it in GitHub Desktop.
Meta explorations in R
library(tidyverse)
string_code <-
"
diamonds |>
select(carat, cut, color, clarity, price) |>
group_by(color) |>
summarise(n = n(), price = mean(price))
"
# split up a base pipe chain using simple regex, and return list of exprs
stringr::str_split(string_code, "\\|>") %>%
purrr::map(~ trimws(.x)) %>%
purrr::flatten_chr() %>%
purrr::map(~ rlang::parse_expr(.x))
#> [[1]]
#> diamonds
#>
#> [[2]]
#> select(carat, cut, color, clarity, price)
#>
#> [[3]]
#> group_by(color)
#>
#> [[4]]
#> summarise(n = n(), price = mean(price))
# LIMITATION: if student has |> in a string like this:
string_code <-
'
"This is a pipe: |>" |>
cat()
'
# experimental way to split base |> calls
library(tidyverse)
# Recurse on a base pipe code and return a list of intermediate expressions.
#
# @param expr The parent expression
# @param top_expr The `character` of the first or top expression symbol/function
# @param expr_list The `[]` that maintains the intermediate expressions
# by default empty `[]`
#
# @return The `[expression]`
recurse_base_pipe <- function(expr, top_expr, expr_list = list()) {
if (length(expr) == 1) return(append(expr, expr_list))
# grab the verb
verb_expr <- expr[[1]]
# base case if the verb is the first symbol/function call at the top
if (identical(verb_expr, top_expr)) {
return(append(expr, expr_list))
}
# extract verb first argument expr (we will recurse on this)
first_arg_expr <- expr[[2]]
# extract arguments
args <- rlang::call_args(expr)
# if the first argument is not a call return
if (!length(args)) {
return(expr_list)
}
# if there are more than 1 arg, take args from 2nd and so on
if (length(args) > 1) {
args <- args[2:length(args)]
verb_call_without_first_arg <- rlang::call2(verb_expr, !!!args)
} else {
# otherwise, we just make a no argument call with the verb
verb_call_without_first_arg <- rlang::call2(verb_expr)
}
# set the new expr_list list
expr_list <- append(verb_call_without_first_arg, expr_list)
# recurse on the first argument expression
recurse_base_pipe(first_arg_expr, top_expr, expr_list)
}
# This splits a base pipe code into a list of its intermediate expressions.
#
# It relies on using `getParseData()` to determine what the first expression is
# at the top/beginning of the chain. For e.g., for `mtcars |> head()`, we
# determine `mtcars` to be the stopping point.
#
# Then it uses `recurse_base_pipe()` on the quoted version of the code
# to recursively figure out the intermediate expressions.
#
# @param string_code A `character()` of the code
#
# @return The `[expression]`
split_base_pipe <- function(string_code) {
parse_data <- getParseData(parse(text = string_code))
# figure out what the starting expression of the chain is
top_expr <- parse_data[
parse_data$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL"), 'text'
][[1]]
top_expr <- rlang::sym(top_expr)
recurse_base_pipe(
rlang::parse_expr(string_code),
top_expr
)
}
# examples
# starting at a symbol
expr1 <-
"mtcars |>
group_by() |>
summarise(mean_mpg = mean(mpg))"
split_base_pipe(expr1)
#> [[1]]
#> mtcars
#>
#> [[2]]
#> group_by()
#>
#> [[3]]
#> summarise(mean_mpg = mean(mpg))
# starting with a function call
expr2 <-
"data.frame(
fruit = c('apple', 'apple', 'orange', 'orange'),
cost = c(1.79, 2.79, 0.99, 1.50)
) |>
group_by(fruit) |>
summarise(mean_cost = mean(cost))"
split_base_pipe(expr2)
#> [[1]]
#> data.frame(fruit = c("apple", "apple", "orange", "orange"), cost = c(1.79,
#> 2.79, 0.99, 1.5))
#>
#> [[2]]
#> group_by(fruit)
#>
#> [[3]]
#> summarise(mean_cost = mean(cost))
# base pipes in args
expr3 <-
"foo('here is a string' |> sum()) |>
bar()"
split_base_pipe(expr3)
#> [[1]]
#> foo(sum("here is a string"))
#>
#> [[2]]
#> bar()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment