Function for combining arbitrary functions across specified 2-way column combinations
library(tidyverse) | |
c_sort_collapse <- function(...){ | |
c(...) %>% | |
sort() %>% | |
str_c(collapse = ".") | |
} | |
ready_combinations <- function(df, | |
..., | |
associative = FALSE) { | |
# I. Nest & pivot | |
df_lists <- df %>% | |
summarise_all(list) %>% | |
pivot_longer(cols = everything(), | |
names_to = "var", | |
values_to = "vector") | |
# II. Expand combinations | |
df_lists_comb <- expand(df_lists, | |
nesting(var, vector), | |
nesting(var2 = var, vector2 = vector)) | |
# III. Filter redundancies | |
df_lists_comb <- df_lists_comb %>% | |
filter(!(var == var2)) %>% | |
arrange(var, var2) %>% | |
select(contains("var"), everything()) %>% | |
mutate(vars = paste0(var, ".", var2)) | |
if(associative){ | |
df_lists_comb <- df_lists_comb %>% | |
mutate(vars = map2_chr(.x = var, .y = var2, .f = c_sort_collapse)) %>% | |
distinct(vars, .keep_all = TRUE) | |
} | |
df_lists_comb | |
} | |
# Function that, upon being reduced, allows arbitrary operators as input | |
# Used for step IV. | |
mutate_map_fun <- function(df, fun = "/", fun_name = NULL){ | |
if(is.null(fun_name)) fun_name <- fun | |
mutate(df, {{fun_name}} := map2(vector, vector2, !!rlang::sym(fun))) | |
} | |
mutate_pairwise <- function(df, | |
..., | |
funs = list("/", "-"), | |
funs_names = NULL, | |
associative = FALSE, | |
sep = ".") { | |
df_cols <- select(df, ...) %>% | |
mutate_if(is.numeric, as.double) | |
# I. - III. | |
df_lists_comb <- ready_combinations(df_cols, ..., associative) | |
# IV. | |
if(is.null(funs_names)) funs_names <- funs | |
new_features <- reduce2(.x = funs, | |
.y = funs_names, | |
.f = mutate_map_fun, | |
.init = df_lists_comb) %>% | |
# V. | |
select(-c(vector, vector2)) %>% | |
pivot_longer(cols = -contains("var")) %>% | |
mutate(name_vars = str_c(var, name, var2, sep = sep)) %>% | |
select(name_vars, everything(), -c(var, var2, vars, name)) %>% | |
pivot_wider(values_from = value, | |
names_from = name_vars) %>% | |
unnest(cols = everything()) | |
# VI. | |
bind_cols(select(df, -one_of(colnames(df_cols))), new_features) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment