Skip to content

Instantly share code, notes, and snippets.

@cimentadaj
Created February 18, 2020 12:21
Show Gist options
  • Save cimentadaj/ac54e7f5b44e8cbcb0979d532c1f4ed3 to your computer and use it in GitHub Desktop.
Save cimentadaj/ac54e7f5b44e8cbcb0979d532c1f4ed3 to your computer and use it in GitHub Desktop.
library(recipes)
library(parsnip)
library(workflows)
spline_cars <-
recipe(mpg ~ ., data = mtcars) %>%
step_ns(disp, deg_free = 10)
bayes_lm <-
linear_reg() %>%
set_engine("stan")
spline_cars_prepped <- prep(spline_cars, mtcars)
bayes_lm_fit <- fit(bayes_lm, mpg ~ ., data = juice(spline_cars_prepped))
car_wflow <-
mtcars %>%
workflow() %>%
add_initial_split(prop = 3/4) %>%
add_vfold_cv(v = 10) %>%
add_recipe(
recipe(mpg ~ .) %>%
step_ns(disp, deg_free = 10)
) %>%
add_model(bayes_lm)
car_wflow %>%
fit()
car_wflow %>%
update_step(
.rcp %>%
step_arrange(cyl) %>%
step_ns(disp, deg_free = 10)
) %>%
fit()
rcp <- function (formula, data = NULL, ...) {
f_funcs <- recipes:::fun_calls(formula)
if (any(f_funcs == "-")) {
rlang::abort("`-` is not allowed in a recipe formula. Use `step_rm()` instead.")
}
## Extract all terms from formula
all_terms <- as.character(attr(terms(formula), "variables"))
all_terms <- setdiff(all_terms, "list")
# Create empty df with column names
data <- rep(list(numeric()), length(all_terms))
names(data) <- all_terms
data <- structure(data, class = "data.frame")
# Continue with normal `recipe`
args <- recipes:::form2args(formula, data)
obj <- recipes:::recipe.data.frame(x = args$x,
formula = NULL,
vars = args$vars,
roles = args$roles)
class(obj) <- c("no_data", class(obj))
obj
}
prep.no_data <- function(x,
training = NULL,
fresh = FALSE,
verbose = FALSE,
retain = TRUE,
strings_as_factors = TRUE,
...) {
if (nrow(x$template) == 0) {
rlang::abort("No data has been specified in the recipe.")
}
class(x) <- setdiff(class(x), "no_data")
## Because we haven't really checked that all vars
## are in the data (since we hijacked `rcp` with an empty df)
## proceed with the normal recipe checks for raising an error
# in case the supplied df is not compatible with the recipe
args <- recipes:::form2args(recipe2formula(x), x$template)
obj <- recipes:::recipe.data.frame(x = args$x,
formula = NULL,
vars = args$vars,
roles = args$roles)
## The only extra thing to add is all the steps the user supplied
## earlier.
obj$steps <- x$steps
prep(obj,
training = training,
fresh = fresh,
verbose = verbose,
retain = retain,
strings_as_factors = strings_as_factors,
...)
}
my_rcp <-
rcp(mpg ~ cyl + am + drat) %>%
step_log(mpg, base = 10) %>%
step_log(drat)
my_rcp$template <- mtcars
prep(my_rcp)
recipe2formula <- function(x) {
x <- summary(x)
x_vars <- x$variable[x$role == "predictor"]
x_vars <- x_vars[!is.na(x_vars)]
if (length(x_vars) == 0)
x_vars <- 1
y_vars <- x$variable[x$role == "outcome"]
y_vars <- y_vars[!is.na(y_vars)]
if (length(y_vars) == 0)
y_vars <- ""
x_vars <- paste0(x_vars, collapse = "+")
y_vars <- paste0(y_vars, collapse = "+")
as.formula(paste(y_vars, x_vars, sep = "~"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment