Created
October 15, 2024 20:41
-
-
Save rubenarslan/2dafc0a4d3cb6deac00afaa8ea6fac37 to your computer and use it in GitHub Desktop.
simulation code with memory leak
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load necessary libraries for data manipulation, visualization, and parallel processing | |
library(tidyverse) # Provides a collection of R packages for data manipulation | |
library(furrr) # Parallel processing using purrr | |
library(dtplyr) | |
# Set up parallel processing plan with 6 workers using multisession | |
plan(multisession(workers = 20)) | |
# Check current memory usage | |
pryr::mem_used() | |
# Set random seed for reproducibility | |
set.seed(05102019) | |
# Create a grid of parameters for simulations | |
parameters <- expand_grid( | |
Iteration = 1, | |
N = 500, # Number of individuals | |
num_traits = 35, # Number of traits/preferences | |
trait_pref_correlation = c(0, 0.4), | |
trait_mean_weight = c(0, 0.3, 0.5), | |
pref_mean_weight = c(0, 0.3, 0.5), | |
great_partner_weight = c(0, 0.3, 0.5), | |
high_standards_weight = c(0, 0.3, 0.5), | |
trait_intercept = c(-1, 0, 1), | |
pref_intercept = c(-1, 0, 1), | |
normative_pref_strength_fun = c("rexp", "logrnorm"), | |
normative_pref_strength_rate = 1, | |
random_responding_sd = c(0, 1, 2), | |
likert_noise_sd = c(0.1, 0.3), | |
true_liking_filter = c(-Inf, 0, 2) | |
) %>% | |
arrange(runif(n())) # Randomize the order of parameter combinations | |
# Display the number of parameter combinations | |
nrow(parameters) | |
# | |
# # Parameters modeled on the original data | |
p <- list( | |
N = 5000, | |
num_traits = 35, | |
trait_intercept = 0, | |
pref_intercept = 0, | |
trait_pref_correlation = 0.4, | |
trait_mean_weight = 0.5, | |
pref_mean_weight = 0.5, | |
great_partner_weight = 0.3, | |
high_standards_weight = 0.3, | |
normative_pref_strength_fun = "rexp", | |
normative_pref_strength_rate = 1.5, | |
random_responding_sd = 2, | |
likert_noise_sd = 0.5, | |
true_liking_filter = -Inf | |
) | |
# Function to extract the upper triangle of a matrix | |
get_upper_tri <- function(x) { | |
x[upper.tri(x)] | |
} | |
# Function to run a single iteration of the simulation | |
run_simulation <- function(...) { | |
# Convert input parameters to a list | |
p <- list(...) | |
# Define function for a log-transformed normal dist (left-skewed) | |
logrnorm <- function(n, rate) { | |
log(abs(rnorm(n, mean = 0, sd = rate))) | |
} | |
# Function to convert continuous values to an 11-point Likert scale | |
to_likert_11 <- function(x, noise_sd) { | |
x <- round(6 + 4 * (x + p$likert_noise_sd * rnorm(length(x), sd = exp(noise_sd)))) | |
x[x < 1] <- 1 | |
x[x > 11] <- 11 | |
x | |
} | |
scale <- function(...) { base::scale(...)[,1] } | |
# Adjust N to ensure the desired number of individuals after filtering | |
initial_N <- round(p$N / (1 - pnorm(p$true_liking_filter))) | |
# Generate data frame with individual traits and preferences | |
df <- tibble( | |
individual_id = rep(1:initial_N, each = p$num_traits), | |
trait_id = rep(1:p$num_traits, times = initial_N), | |
# Generate random responding noise for each individual | |
random_responding = rep(rnorm(initial_N, sd = p$random_responding_sd), each = p$num_traits), | |
# Generate trait means for each trait | |
trait_mean = rep(rnorm(p$num_traits), times = initial_N), | |
# Generate individual-level variables | |
great_partner = rep(rnorm(initial_N, mean = 0), each = p$num_traits), | |
high_standards = rep(rnorm(initial_N, mean = 0), each = p$num_traits), | |
# Generate preference means influenced by trait means | |
pref_mean = rep(rnorm(p$num_traits), times = initial_N) + trait_mean, | |
# Generate normative preference strengths | |
normative_pref_strength = rep(get(p$normative_pref_strength_fun)(p$num_traits, rate = p$normative_pref_strength_rate), times = initial_N) | |
) %>% | |
lazy_dt() %>% | |
# Generate trait and preference values | |
mutate( | |
trait_value = p$trait_intercept + | |
p$trait_mean_weight * trait_mean + | |
p$great_partner_weight * great_partner + | |
rnorm(n()), # Add random noise | |
pref_value = p$pref_intercept + | |
p$high_standards_weight * high_standards + | |
p$pref_mean_weight * pref_mean + | |
p$trait_pref_correlation * trait_value + | |
rnorm(n()) # Add random noise | |
) %>% | |
# Convert trait and preference values to Likert scale | |
group_by(trait_id) %>% | |
mutate( | |
trait_likert = to_likert_11(trait_value, random_responding), | |
pref_likert = to_likert_11(pref_value, random_responding) | |
) %>% | |
ungroup() | |
# Compute true liking scores for each individual | |
liking <- df %>% | |
group_by(individual_id) %>% | |
summarise( | |
random_responding = first(random_responding), | |
true_liking = sum(trait_value * normative_pref_strength) / sqrt(p$num_traits) | |
) %>% | |
# Standardize true liking scores | |
mutate(true_liking = scale(true_liking)) %>% | |
# Filter individuals based on true liking threshold | |
filter(true_liking > p$true_liking_filter) %>% | |
# Convert true liking to Likert scale | |
mutate(liking_likert = to_likert_11(true_liking/2, (random_responding/3))) | |
# Subset df to only include individuals who passed the true liking filter | |
df <- liking %>% select(individual_id) %>% | |
left_join(df, by = "individual_id") %>% | |
collect() | |
# Compute summary statistics for traits and preferences | |
traits <- df %>% | |
group_by(trait_id) %>% | |
summarise( | |
trait_mean = mean(trait_likert, na.rm = TRUE), | |
trait_sd = sd(trait_likert, na.rm = TRUE), | |
pref_mean = mean(pref_likert, na.rm = TRUE), | |
pref_sd = sd(pref_likert, na.rm = TRUE) | |
) | |
# Reshape data to wide format for correlation analysis | |
traits_prefs_wide <- df %>% | |
select(individual_id, trait_id, trait_likert, pref_likert) %>% | |
pivot_wider(names_from = trait_id, values_from = c(trait_likert, pref_likert)) %>% | |
collect() | |
# Compute average correlations among preferences and traits | |
avg_pref_cor <- traits_prefs_wide %>% | |
select(starts_with("pref_likert_")) %>% | |
cor() %>% | |
get_upper_tri() | |
avg_trait_cor <- traits_prefs_wide %>% | |
select(starts_with("trait_likert_")) %>% | |
cor() %>% | |
get_upper_tri() | |
# Compute pattern metrics | |
pattern_metric <- df %>% | |
group_by(trait_id) %>% | |
mutate( | |
trait_mean = mean(trait_likert, na.rm = TRUE), | |
trait_norm = trait_likert - trait_mean, | |
pref_mean = mean(pref_likert, na.rm = TRUE), | |
pref_norm = pref_likert - pref_mean | |
) %>% | |
group_by(individual_id) %>% | |
summarise( | |
corrected_pattern_metric = cor(trait_norm, pref_norm, use = 'pairwise.complete.obs')[1], | |
norm_pattern_metric = cor(trait_likert, pref_mean, use = 'pairwise.complete.obs')[1], | |
trait_mean = mean(trait_likert), | |
pref_mean = mean(pref_likert), | |
raw_pattern_metric = cor(trait_likert, pref_likert, use = 'pairwise.complete.obs')[1] | |
) %>% | |
# Apply Fisher's z-transformation to correlation coefficients | |
mutate( | |
corrected_pattern_metric = psych::fisherz(corrected_pattern_metric), | |
norm_pattern_metric = psych::fisherz(norm_pattern_metric), | |
raw_pattern_metric = psych::fisherz(raw_pattern_metric) | |
) %>% | |
# Handle infinite values resulting from Fisher transformation | |
mutate( | |
raw_pattern_metric = if_else(is.finite(raw_pattern_metric), raw_pattern_metric, NA_real_), | |
norm_pattern_metric = if_else(is.finite(norm_pattern_metric), norm_pattern_metric, NA_real_), | |
corrected_pattern_metric = if_else(is.finite(corrected_pattern_metric), corrected_pattern_metric, NA_real_) | |
) | |
# Combine pattern metrics with liking scores and wide data | |
wide_df <- pattern_metric %>% | |
left_join(liking %>% collect(), by = "individual_id") %>% | |
left_join(traits_prefs_wide, by = "individual_id") | |
# Linear models to compute incremental variance explained by corrected pattern metric | |
m1 <- lm(liking_likert ~ ., wide_df %>% select(liking_likert, starts_with("trait_likert_"))) | |
m2 <- lm(liking_likert ~ ., wide_df %>% select(liking_likert, corrected_pattern_metric, starts_with("trait_likert_"))) | |
incremental_var_explained_by_cpm <- diff(sapply(list(m1, m2), function(x) summary(x)$adj.r.squared)) | |
# Compile results | |
results <- cbind( | |
as_tibble(p), # Return parameters | |
final_N = nrow(wide_df), | |
# Proportion of responses at ceiling for preferences and traits | |
df %>% summarise( | |
pref_at_ceiling = mean(pref_likert == 11), | |
trait_at_ceiling = mean(trait_likert == 11) | |
), | |
wide_df %>% summarise( | |
liking_at_ceiling = mean(liking_likert == 11), | |
liking_at_floor = mean(liking_likert == 1) | |
), | |
# Correlation between random responding and pattern metrics | |
df %>% | |
select(individual_id, random_responding) %>% | |
left_join(pattern_metric, by = "individual_id") %>% | |
summarise( | |
cor_random_responding_cpm = cor(random_responding, corrected_pattern_metric, use = 'pairwise.complete.obs'), | |
cor_random_responding_rpm = cor(random_responding, raw_pattern_metric, use = 'pairwise.complete.obs') | |
), | |
# Average correlation between preferences and traits across traits | |
df %>% | |
group_by(trait_id) %>% | |
summarise(avg_pref_trait_cor = cor(pref_likert, trait_likert, use = 'pairwise.complete.obs')) %>% | |
summarise(avg_pref_trait_cor = mean(avg_pref_trait_cor, na.rm = TRUE)), | |
# Standard deviations of trait and preference means | |
traits %>% summarise( | |
sd_norm_prefs = sd(pref_mean), | |
sd_traits = sd(trait_mean) | |
), | |
# Mean and SD of correlations among traits and preferences | |
tibble( | |
trait_cor_mean = mean(avg_trait_cor, na.rm = TRUE), | |
trait_cor_sd = sd(avg_trait_cor, na.rm = TRUE), | |
pref_cor_mean = mean(avg_pref_cor, na.rm = TRUE), | |
pref_cor_sd = sd(avg_pref_cor, na.rm = TRUE) | |
), | |
# Incremental variance explained by corrected pattern metric | |
incremental_var_explained_by_cpm = incremental_var_explained_by_cpm, | |
pattern_metric %>% | |
summarise( | |
cor_corrected_trait_mean = cor(corrected_pattern_metric, trait_mean, use = 'pairwise.complete.obs') | |
), | |
# Correlations between pattern metrics and liking | |
wide_df %>% | |
summarise( | |
avg_cor_corrected = mean(corrected_pattern_metric, na.rm = TRUE), | |
cor_corrected_latent = cor(corrected_pattern_metric, true_liking, use = 'pairwise.complete.obs'), | |
cor_corrected = cor(corrected_pattern_metric, liking_likert, use = 'pairwise.complete.obs'), | |
cor_test_p = broom::tidy(cor.test(corrected_pattern_metric, liking_likert))$p.value[1], | |
cor_raw = cor(raw_pattern_metric, liking_likert, use = 'pairwise.complete.obs'), | |
cor_norm = cor(norm_pattern_metric, liking_likert, use = 'pairwise.complete.obs') | |
) | |
) | |
write.table(results, file = "sim_results.csv", sep = ",", | |
row.names = FALSE, col.names = !file.exists("sim_results.csv"), append = TRUE) | |
as.data.frame(results) | |
} | |
# Test the run_simulation function with parameter set p | |
system.time({ | |
p %>% pmap(run_simulation) | |
}) | |
# Function to safely run simulation and capture errors | |
safe_run_simulation <- function(...) { | |
tryCatch({ | |
result <- run_simulation(...) | |
cbind(success = TRUE, result) | |
}, error = function(e) { | |
cbind(success = FALSE, as_tibble(list(...))) | |
}) | |
} | |
# Check memory usage before simulations | |
pryr::mem_used() | |
# Run simulations in parallel over the parameter grid | |
SimulationResults <- parameters %>% | |
pmap( | |
safe_run_simulation, | |
.progress = TRUE | |
# .options = furrr_options(seed = 20191005) | |
) %>% | |
# Combine all results into a single data frame | |
bind_rows() | |
# Check memory usage after simulations | |
pryr::mem_used() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment