Skip to content

Instantly share code, notes, and snippets.

@brshallo
Last active October 15, 2020 15:36
Show Gist options
  • Save brshallo/f92a5820030e21cfed8f823a6e1d56e1 to your computer and use it in GitHub Desktop.
Save brshallo/f92a5820030e21cfed8f823a6e1d56e1 to your computer and use it in GitHub Desktop.
Function for combining arbitrary functions across specified 2-way column combinations
library(tidyverse)
c_sort_collapse <- function(...){
c(...) %>%
sort() %>%
str_c(collapse = ".")
}
ready_combinations <- function(df,
...,
associative = FALSE) {
# I. Nest & pivot
df_lists <- df %>%
summarise_all(list) %>%
pivot_longer(cols = everything(),
names_to = "var",
values_to = "vector")
# II. Expand combinations
df_lists_comb <- expand(df_lists,
nesting(var, vector),
nesting(var2 = var, vector2 = vector))
# III. Filter redundancies
df_lists_comb <- df_lists_comb %>%
filter(!(var == var2)) %>%
arrange(var, var2) %>%
select(contains("var"), everything()) %>%
mutate(vars = paste0(var, ".", var2))
if(associative){
df_lists_comb <- df_lists_comb %>%
mutate(vars = map2_chr(.x = var, .y = var2, .f = c_sort_collapse)) %>%
distinct(vars, .keep_all = TRUE)
}
df_lists_comb
}
# Function that, upon being reduced, allows arbitrary operators as input
# Used for step IV.
mutate_map_fun <- function(df, fun = "/", fun_name = NULL){
if(is.null(fun_name)) fun_name <- fun
mutate(df, {{fun_name}} := map2(vector, vector2, !!rlang::sym(fun)))
}
mutate_pairwise <- function(df,
...,
funs = list("/", "-"),
funs_names = NULL,
associative = FALSE,
sep = ".") {
df_cols <- select(df, ...) %>%
mutate_if(is.numeric, as.double)
# I. - III.
df_lists_comb <- ready_combinations(df_cols, ..., associative)
# IV.
if(is.null(funs_names)) funs_names <- funs
new_features <- reduce2(.x = funs,
.y = funs_names,
.f = mutate_map_fun,
.init = df_lists_comb) %>%
# V.
select(-c(vector, vector2)) %>%
pivot_longer(cols = -contains("var")) %>%
mutate(name_vars = str_c(var, name, var2, sep = sep)) %>%
select(name_vars, everything(), -c(var, var2, vars, name)) %>%
pivot_wider(values_from = value,
names_from = name_vars) %>%
unnest(cols = everything())
# VI.
bind_cols(select(df, -one_of(colnames(df_cols))), new_features)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment