library(future)
library(furrr)
library(kit)
#> Attaching kit 0.0.15 (OPENMP enabled using 1 thread)
library(tidyverse)
## reprex data
set.seed(1)
vars <- paste0(letters,1:10)[1:15]
bestVars <- combn(vars, 5, simplify = F)
df <- data.frame(
matrix(data = rnorm(50000*length(vars),200,500), nrow = 50000, ncol = length(vars))
)
names(df) <- vars
df$value <- rnorm(n = nrow(df), 350, 300)
df <- df %>%
dplyr::select(value,everything(.))
df <- lapply(split.default(x = df, names(df)), function(x) x[[1]])
list2env(df, globalenv())
#> <environment: R_GlobalEnv>
rm(df)
run_sim_in_par <- function(vars_to_sim)
{
sampled_rows <- sample(x = 1:length(value), size = 50, replace = F)
varname <- paste(names(vars_to_sim), collapse = "*")
best <- Reduce(vars_to_sim, f = '*')[sampled_rows]
row_idx <- kit::topn(best, n = 5, decreasing = T, hasna = FALSE, index = TRUE)
best_row_value <- value[sampled_rows][row_idx]
sim <- data.frame(var = varname,
mean_value = mean(best_row_value))
return(sim)
}
## working when explicitly declaring .x
x <- bestVars[[1]]
simulated_res <- run_sim_in_par(vars_to_sim = mget(x))
RNGkind("L'Ecuyer-CMRG")
set.seed(123)
(s <- .Random.seed)
#> [1] 10407 1806547166 -983674937 643431772 1162448557 -959247990 -133913213
seeds <- replicate(length(bestVars), parallel::nextRNGStream(s), simplify = FALSE)
stime <- Sys.time()
simulated_res <- lapply(seq_along(bestVars), function(i) future(run_sim_in_par(vars_to_sim = mget(bestVars[[i]], inherits =TRUE)), seed = seeds[[i]], globals = bestVars[[i]]))
simulated_res <- lapply(simulated_res, future::value)
simulated_res <- dplyr::bind_rows(simulated_res)
tail(simulated_res)
#> var mean_value
#> 2998 j10*k1*l2*m3*n4 287.6805
#> 2999 j10*k1*l2*m3*o5 291.9474
#> 3000 j10*k1*l2*n4*o5 321.1213
#> 3001 j10*k1*m3*n4*o5 373.6220
#> 3002 j10*l2*m3*n4*o5 219.9732
#> 3003 k1*l2*m3*n4*o5 221.2340
Sys.time()-stime
#> Time difference of 15.49343 secs
Created on 2024-01-14 with reprex v2.0.2