Skip to content

Instantly share code, notes, and snippets.

@dgrtwo
Last active September 16, 2015 09:10
Show Gist options
  • Save dgrtwo/3f4b06873cc3360ae33b to your computer and use it in GitHub Desktop.
Save dgrtwo/3f4b06873cc3360ae33b to your computer and use it in GitHub Desktop.
Some thoughts on a parsetidy function that turns expressions into tidy data frames, and further processes them (for use in reprex package)
parsetidy <- function(x, ...) UseMethod("parsetidy")
parsetidy.default <- function(x, ...) {
# if it's not a call or name (e.g. it's numeric),
# don't need it
data.frame()
}
parsetidy.expression <- function(x, ...) {
names(x) <- seq_along(x)
plyr::ldply(x, parsetidy, .id = "statement", ...)
}
parsetidy.call <- function(x, blacklist = c(), ...) {
if (as.character(x[[1]]) %in% blacklist) {
return(data.frame())
}
ret <- plyr::ldply(x, parsetidy, blacklist = blacklist, ...)
ret$is_function[1] <- TRUE
ret
}
parsetidy.name <- function(x, env = parent.frame(), ...) {
ch <- as.character(x)
envname <- tryCatch(environmentName(pryr::where(ch, env)),
error = function(e) NA)
data.frame(name = as.character(x),
is_function = FALSE,
environment = envname,
assignment = FALSE,
stringsAsFactors = FALSE)
}
`parsetidy.<-` <- function(x, ...) {
ret <- parsetidy.call(x)[-1, ]
if (inherits(x[[2]], "name")) {
ret$assignment[1] <- TRUE
}
ret
}
`parsetidy.{` <- function(x, ...) {
ret <- parsetidy.expression(x[-1])
ret$statement <- NULL
ret
}
define_vars <- function(...) {
vs <- lazyeval::lazy_dots(...)
varnames <- unname(sapply(vs, function(v) as.character(v$expr)))
}
define_vars_ <- function(varnames) {
dputs <- sapply(varnames, function(v) {
txt <- eval(substitute(capture.output(dput(x)), list(x = parse(text = v)[[1]])))
paste(txt, collapse = "\n")
})
paste(varnames, dputs, sep = " <- ")
}
dplyr_NSE_functions <- function() {
funcs <- ls("package:dplyr")
ret <- funcs[grepl(".*_$", funcs)]
gsub("_$", "", ret)
}
built_in <- c("base", "package:stats", "package:datasets",
"package:graphics", "package:grDevices",
"package:utils")
library(dplyr)
dplyr_NSE <- dplyr_NSE_functions()
vars_used <- function(expr) {
td <- parsetidy(expr, blacklist = c("aes", "~", dplyr_NSE))
# remove cases where a variable is assigned within code
processed <- td %>%
filter(!(environment %in% built_in)) %>%
group_by(name) %>%
filter(!assignment[1]) %>%
ungroup()
packages <- processed %>%
filter(grepl("^package:", environment)) %>%
.$environment %>%
unique() %>%
stringr::str_replace("package:", "")
undefined <- processed %>%
filter(is.na(environment)) %>%
.$name %>%
unique()
vars <- processed %>%
filter(!grepl("^package:", environment),
!is.na(environment)) %>%
.$name %>%
unique()
list(vars = vars, undefined = undefined, packages = packages)
}
reprex_prefix <- function(code) {
used <- vars_used(parse(text = code))
if (length(used$undefined) > 0) {
message("The following variables were not found and not included ",
"in the prefix: ",
paste(used$undefined, collapse = ", "))
}
if (length(used$packages) > 0) {
ret1 <- paste0("library(", used$packages, ")", collapse = "\n")
} else {
ret1 <- ""
}
ret2 <- paste0(define_vars_(used$vars), collapse = "\n")
paste(ret1, ret2, sep = "\n")
}
library(ggplot2)
x <- 1
y <- 3:8
code <- "
z <- x + y + other
d <- z + 2
print(d + 3)
ggplot(mtcars, aes(wt, mpg)) + geom_point()
mtcars %>%
mutate(wt = wt + 1) %>%
group_by(cyl, am) %>%
summarise(mean(am))
"
cat(reprex_prefix(code))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment