Skip to content

Instantly share code, notes, and snippets.

@sims1253
Created April 26, 2023 11:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sims1253/5469c7fbf923cd9c38df64cf1df9708e to your computer and use it in GitHub Desktop.
Save sims1253/5469c7fbf923cd9c38df64cf1df9708e to your computer and use it in GitHub Desktop.
An example for how a configuration setup for a simulation study can look.
library(brms)
library(bayesim)
library(bayesfam)
library(dplyr)
library(purrr)
set.seed(1235813)
# Sim Setup
RESULT_PATH <- "~/Documents/foo"
NCORES <- 12
CLUSTER_TYPE <- "FORK" # or PSOCK if you have Windows
SEED <- 1339
set.seed(SEED)
options(error = recover) # for easier debugging
DEBUG <- FALSE # unless you want every single simulation step written to disk
DATASET_N <- 200
DATA_GEN_FUN = basedag_data
stan_pars <- list(
backend = "rstan",
cmdstan_path = NULL,
cmdstan_write_path = NULL,
warmup = 500,
iter = 1500,
chains = 1,
init = 0.1
)
metrics <- c(
"v_mean",
"v_sd",
"v_median",
"v_mad",
"v_pos_prob",
"v_quantiles",
"v_bias",
"v_rmse",
"v_mae",
"v_mse",
"v_true_percentile",
"divergent_transitions_rel",
"bad_pareto_ks",
"time_per_sample",
"rhat",
"ess_bulk",
"ess_tail",
"elpd_loo",
"elpd_test",
"rmse_loo",
"rmse_test",
"r2_loo",
"r2_test",
"data_gen",
"fit_gen"
)
VARS_OF_INTEREST = list(list(c("b_x")))
QUANTILES = list(list(seq(0.1, 0.9, length.out = 9)))
# Data Gen Setup
data_generation_configuration <- expand.grid(
z1_x_coef = NA,
z1_y_coef = NA,
z2_y_coef = 0.5,
z3_x_coef = 0.8,
x_z4_coef = NA,
y_z4_coef = NA,
sigma_z1 = 0.5,
sigma_z2 = 0.5,
sigma_z3 = 0.5,
sigma_z4 = 0.5,
sigma_x = 0.5,
data_N = 100,
dataset_N = DATASET_N,
data_family = c(
"gamma",
"weibull",
"lognormal",
"softplusnormal",
"frechet",
"betaprime",
"gompertz"
),
data_link = c(
"log",
"softplus",
"identity"
),
lb = 0.000001,
ub = Inf,
resample = 1.3,
x_y_coef = c(NA, 0),
y_intercept = NA,
sigma_y = NA,
shape = c(
"ramp",
"asymmetric",
"symmetric"
),
noise_sd = 0.1,
stringsAsFactors = FALSE
)
data_generation_configuration <- filter(
data_generation_configuration,
!(data_link == "identity" &
data_family != "lognormal" &
data_family != "softplusnormal")
)
data_generation_configuration <- filter(
data_generation_configuration,
!(data_link != "identity" &
(data_family == "lognormal" | data_family == "softplusnormal"))
)
z1_x_coef_list <- list(
"log" = 0.6,
"softplus" = 1.2
)
z1_y_coef_list <- list(
"log" = 0.8,
"softplus" = 1.2
)
x_z4_coef_list <- list(
"log" = 0.5,
"softplus" = 0.5
)
y_z4_coef_list <- list(
"log" = 1,
"softplus" = 0.5
)
sigma_y_list <- list(
"gamma" = c(1, 10, 40),
"weibull" = c(1, 4, 8),
"lognormal" = c(1, 0.35, 0.15),
"softplusnormal" = c(2, 4, 2),
"frechet" = c(2, 5, 10),
"inverse.gaussian" = c(1, 10, 1000),
"betaprime" = c(1, 20, 50),
"gompertz" = c(0.2, 0.3, 0.6)
)
y_intercept_list <- list(
"log" = log(c(1, 10, 10)),
"softplus" = softplus(c(1, 10, 10))
)
x_y_coef_list <- list(
"log" = list(
"ramp" = 0.5,
"asymmetric" = 0.2,
"symmetric" = 0.1
),
"softplus" = list(
"ramp" = 0.9,
"asymmetric" = 1.4,
"symmetric" = 0.8
)
)
for (i in seq_len(nrow(data_generation_configuration))) {
family <- data_generation_configuration$data_family[[i]]
shape <- data_generation_configuration$shape[[i]]
switch(family,
"lognormal" = link <- "log",
"softplusnormal" = link <- "softplus",
link <- data_generation_configuration$data_link[[i]]
)
data_generation_configuration$z1_x_coef[[i]] <- z1_x_coef_list[[link]]
data_generation_configuration$z1_y_coef[[i]] <- z1_y_coef_list[[link]]
data_generation_configuration$x_z4_coef[[i]] <- x_z4_coef_list[[link]]
data_generation_configuration$y_z4_coef[[i]] <- y_z4_coef_list[[link]]
if (is.na(data_generation_configuration$x_y_coef[[i]])) {
data_generation_configuration$x_y_coef[[i]] <- x_y_coef_list[[link]][[shape]]
}
if (shape == "ramp") {
data_generation_configuration$sigma_y[[i]] <- sigma_y_list[[family]][[1]]
data_generation_configuration$y_intercept[[i]] <- y_intercept_list[[link]][[1]]
}
if (shape == "asymmetric") {
data_generation_configuration$sigma_y[[i]] <- sigma_y_list[[family]][[2]]
data_generation_configuration$y_intercept[[i]] <- y_intercept_list[[link]][[2]]
}
if (shape == "symmetric") {
data_generation_configuration$sigma_y[[i]] <- sigma_y_list[[family]][[3]]
data_generation_configuration$y_intercept[[i]] <- y_intercept_list[[link]][[3]]
}
}
data_generation_configuration$id <- as.numeric(
rownames(data_generation_configuration)
)
# Fit Gen Setup
fit_configuration <- expand.grid(
fit_family = c(
"gompertz",
"gamma",
"weibull",
"lognormal",
"softplusnormal",
"frechet",
"betaprime",
"gaussian"
),
fit_link = c(
"log",
"softplus",
"identity"
),
formula = c(
"y ~ x + z1 + z2",
"y ~ x + z2",
"y ~ x + z1",
"y ~ x + z1 + z2 + z3",
"y ~ x + z1 + z2 + z4"
),
stringsAsFactors = FALSE
)
fit_configuration <- filter(
fit_configuration,
!(fit_link == "identity" &
fit_family != "gaussian" &
fit_family != "lognormal" &
fit_family != "softplusnormal" &
fit_family != "lognormal_custom")
)
fit_configuration <- filter(
fit_configuration,
!(fit_link != "identity" &
(fit_family == "lognormal" |
fit_family == "softplusnormal" |
fit_family == "lognormal_custom"))
)
fit_configuration$prior = c() # should stay empty for this
# Simulation
result_df <- full_simulation(
data_gen_confs = data_generation_configuration,
data_gen_fun = DATA_GEN_FUN,
fit_confs = fit_configuration,
metrics = metrics,
ncores_simulation = NCORES,
cluster_type = CLUSTER_TYPE,
stan_pars = stan_pars,
seed = SEED,
result_path = RESULT_PATH,
debug = DEBUG,
vars_of_interest = VARS_OF_INTEREST,
quantiles = QUANTILES
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment