Skip to content

Instantly share code, notes, and snippets.

@jan-glx
Last active January 14, 2024 20:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jan-glx/ade50aa60b586d6d2400eb90cc8baf8c to your computer and use it in GitHub Desktop.
Save jan-glx/ade50aa60b586d6d2400eb90cc8baf8c to your computer and use it in GitHub Desktop.
library(future)
library(furrr)
library(kit)
#> Attaching kit 0.0.15 (OPENMP enabled using 1 thread)
library(tidyverse)

## reprex data
set.seed(1)
vars <- paste0(letters,1:10)[1:15]
bestVars <- combn(vars, 5, simplify = F)
df <- data.frame(
  matrix(data = rnorm(50000*length(vars),200,500), nrow = 50000, ncol = length(vars))
)
names(df) <- vars
df$value <- rnorm(n = nrow(df), 350, 300)
df <- df %>%
  dplyr::select(value,everything(.))
df <- lapply(split.default(x = df, names(df)), function(x) x[[1]])

list2env(df, globalenv())
#> <environment: R_GlobalEnv>

rm(df)

run_sim_in_par <- function(vars_to_sim)
{
  sampled_rows <- sample(x = 1:length(value), size = 50, replace = F)
  varname <- paste(names(vars_to_sim), collapse = "*")
  best <- Reduce(vars_to_sim, f = '*')[sampled_rows]
  row_idx <- kit::topn(best, n = 5, decreasing = T, hasna = FALSE, index = TRUE)
  
  best_row_value <- value[sampled_rows][row_idx]
  
  sim <- data.frame(var = varname,
                    mean_value = mean(best_row_value))
  return(sim)
}

## working when explicitly declaring .x
x <- bestVars[[1]]
simulated_res <- run_sim_in_par(vars_to_sim = mget(x))

RNGkind("L'Ecuyer-CMRG")
set.seed(123)
(s <- .Random.seed)
#> [1]      10407 1806547166 -983674937  643431772 1162448557 -959247990 -133913213
seeds <- replicate(length(bestVars), parallel::nextRNGStream(s), simplify = FALSE)

stime <- Sys.time()
simulated_res <- lapply(seq_along(bestVars), function(i) future(run_sim_in_par(vars_to_sim = mget(bestVars[[i]], inherits =TRUE)), seed  = seeds[[i]],  globals = bestVars[[i]]))
simulated_res <- lapply(simulated_res, future::value)
simulated_res <- dplyr::bind_rows(simulated_res)
tail(simulated_res)
#>                  var mean_value
#> 2998 j10*k1*l2*m3*n4   287.6805
#> 2999 j10*k1*l2*m3*o5   291.9474
#> 3000 j10*k1*l2*n4*o5   321.1213
#> 3001 j10*k1*m3*n4*o5   373.6220
#> 3002 j10*l2*m3*n4*o5   219.9732
#> 3003  k1*l2*m3*n4*o5   221.2340
Sys.time()-stime
#> Time difference of 15.49343 secs

Created on 2024-01-14 with reprex v2.0.2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment