Skip to content

Instantly share code, notes, and snippets.

@rubenarslan
Created October 15, 2024 20:41
Show Gist options
  • Save rubenarslan/2dafc0a4d3cb6deac00afaa8ea6fac37 to your computer and use it in GitHub Desktop.
Save rubenarslan/2dafc0a4d3cb6deac00afaa8ea6fac37 to your computer and use it in GitHub Desktop.
simulation code with memory leak
# Load necessary libraries for data manipulation, visualization, and parallel processing
library(tidyverse) # Provides a collection of R packages for data manipulation
library(furrr) # Parallel processing using purrr
library(dtplyr)
# Set up parallel processing plan with 6 workers using multisession
plan(multisession(workers = 20))
# Check current memory usage
pryr::mem_used()
# Set random seed for reproducibility
set.seed(05102019)
# Create a grid of parameters for simulations
parameters <- expand_grid(
Iteration = 1,
N = 500, # Number of individuals
num_traits = 35, # Number of traits/preferences
trait_pref_correlation = c(0, 0.4),
trait_mean_weight = c(0, 0.3, 0.5),
pref_mean_weight = c(0, 0.3, 0.5),
great_partner_weight = c(0, 0.3, 0.5),
high_standards_weight = c(0, 0.3, 0.5),
trait_intercept = c(-1, 0, 1),
pref_intercept = c(-1, 0, 1),
normative_pref_strength_fun = c("rexp", "logrnorm"),
normative_pref_strength_rate = 1,
random_responding_sd = c(0, 1, 2),
likert_noise_sd = c(0.1, 0.3),
true_liking_filter = c(-Inf, 0, 2)
) %>%
arrange(runif(n())) # Randomize the order of parameter combinations
# Display the number of parameter combinations
nrow(parameters)
#
# # Parameters modeled on the original data
p <- list(
N = 5000,
num_traits = 35,
trait_intercept = 0,
pref_intercept = 0,
trait_pref_correlation = 0.4,
trait_mean_weight = 0.5,
pref_mean_weight = 0.5,
great_partner_weight = 0.3,
high_standards_weight = 0.3,
normative_pref_strength_fun = "rexp",
normative_pref_strength_rate = 1.5,
random_responding_sd = 2,
likert_noise_sd = 0.5,
true_liking_filter = -Inf
)
# Function to extract the upper triangle of a matrix
get_upper_tri <- function(x) {
x[upper.tri(x)]
}
# Function to run a single iteration of the simulation
run_simulation <- function(...) {
# Convert input parameters to a list
p <- list(...)
# Define function for a log-transformed normal dist (left-skewed)
logrnorm <- function(n, rate) {
log(abs(rnorm(n, mean = 0, sd = rate)))
}
# Function to convert continuous values to an 11-point Likert scale
to_likert_11 <- function(x, noise_sd) {
x <- round(6 + 4 * (x + p$likert_noise_sd * rnorm(length(x), sd = exp(noise_sd))))
x[x < 1] <- 1
x[x > 11] <- 11
x
}
scale <- function(...) { base::scale(...)[,1] }
# Adjust N to ensure the desired number of individuals after filtering
initial_N <- round(p$N / (1 - pnorm(p$true_liking_filter)))
# Generate data frame with individual traits and preferences
df <- tibble(
individual_id = rep(1:initial_N, each = p$num_traits),
trait_id = rep(1:p$num_traits, times = initial_N),
# Generate random responding noise for each individual
random_responding = rep(rnorm(initial_N, sd = p$random_responding_sd), each = p$num_traits),
# Generate trait means for each trait
trait_mean = rep(rnorm(p$num_traits), times = initial_N),
# Generate individual-level variables
great_partner = rep(rnorm(initial_N, mean = 0), each = p$num_traits),
high_standards = rep(rnorm(initial_N, mean = 0), each = p$num_traits),
# Generate preference means influenced by trait means
pref_mean = rep(rnorm(p$num_traits), times = initial_N) + trait_mean,
# Generate normative preference strengths
normative_pref_strength = rep(get(p$normative_pref_strength_fun)(p$num_traits, rate = p$normative_pref_strength_rate), times = initial_N)
) %>%
lazy_dt() %>%
# Generate trait and preference values
mutate(
trait_value = p$trait_intercept +
p$trait_mean_weight * trait_mean +
p$great_partner_weight * great_partner +
rnorm(n()), # Add random noise
pref_value = p$pref_intercept +
p$high_standards_weight * high_standards +
p$pref_mean_weight * pref_mean +
p$trait_pref_correlation * trait_value +
rnorm(n()) # Add random noise
) %>%
# Convert trait and preference values to Likert scale
group_by(trait_id) %>%
mutate(
trait_likert = to_likert_11(trait_value, random_responding),
pref_likert = to_likert_11(pref_value, random_responding)
) %>%
ungroup()
# Compute true liking scores for each individual
liking <- df %>%
group_by(individual_id) %>%
summarise(
random_responding = first(random_responding),
true_liking = sum(trait_value * normative_pref_strength) / sqrt(p$num_traits)
) %>%
# Standardize true liking scores
mutate(true_liking = scale(true_liking)) %>%
# Filter individuals based on true liking threshold
filter(true_liking > p$true_liking_filter) %>%
# Convert true liking to Likert scale
mutate(liking_likert = to_likert_11(true_liking/2, (random_responding/3)))
# Subset df to only include individuals who passed the true liking filter
df <- liking %>% select(individual_id) %>%
left_join(df, by = "individual_id") %>%
collect()
# Compute summary statistics for traits and preferences
traits <- df %>%
group_by(trait_id) %>%
summarise(
trait_mean = mean(trait_likert, na.rm = TRUE),
trait_sd = sd(trait_likert, na.rm = TRUE),
pref_mean = mean(pref_likert, na.rm = TRUE),
pref_sd = sd(pref_likert, na.rm = TRUE)
)
# Reshape data to wide format for correlation analysis
traits_prefs_wide <- df %>%
select(individual_id, trait_id, trait_likert, pref_likert) %>%
pivot_wider(names_from = trait_id, values_from = c(trait_likert, pref_likert)) %>%
collect()
# Compute average correlations among preferences and traits
avg_pref_cor <- traits_prefs_wide %>%
select(starts_with("pref_likert_")) %>%
cor() %>%
get_upper_tri()
avg_trait_cor <- traits_prefs_wide %>%
select(starts_with("trait_likert_")) %>%
cor() %>%
get_upper_tri()
# Compute pattern metrics
pattern_metric <- df %>%
group_by(trait_id) %>%
mutate(
trait_mean = mean(trait_likert, na.rm = TRUE),
trait_norm = trait_likert - trait_mean,
pref_mean = mean(pref_likert, na.rm = TRUE),
pref_norm = pref_likert - pref_mean
) %>%
group_by(individual_id) %>%
summarise(
corrected_pattern_metric = cor(trait_norm, pref_norm, use = 'pairwise.complete.obs')[1],
norm_pattern_metric = cor(trait_likert, pref_mean, use = 'pairwise.complete.obs')[1],
trait_mean = mean(trait_likert),
pref_mean = mean(pref_likert),
raw_pattern_metric = cor(trait_likert, pref_likert, use = 'pairwise.complete.obs')[1]
) %>%
# Apply Fisher's z-transformation to correlation coefficients
mutate(
corrected_pattern_metric = psych::fisherz(corrected_pattern_metric),
norm_pattern_metric = psych::fisherz(norm_pattern_metric),
raw_pattern_metric = psych::fisherz(raw_pattern_metric)
) %>%
# Handle infinite values resulting from Fisher transformation
mutate(
raw_pattern_metric = if_else(is.finite(raw_pattern_metric), raw_pattern_metric, NA_real_),
norm_pattern_metric = if_else(is.finite(norm_pattern_metric), norm_pattern_metric, NA_real_),
corrected_pattern_metric = if_else(is.finite(corrected_pattern_metric), corrected_pattern_metric, NA_real_)
)
# Combine pattern metrics with liking scores and wide data
wide_df <- pattern_metric %>%
left_join(liking %>% collect(), by = "individual_id") %>%
left_join(traits_prefs_wide, by = "individual_id")
# Linear models to compute incremental variance explained by corrected pattern metric
m1 <- lm(liking_likert ~ ., wide_df %>% select(liking_likert, starts_with("trait_likert_")))
m2 <- lm(liking_likert ~ ., wide_df %>% select(liking_likert, corrected_pattern_metric, starts_with("trait_likert_")))
incremental_var_explained_by_cpm <- diff(sapply(list(m1, m2), function(x) summary(x)$adj.r.squared))
# Compile results
results <- cbind(
as_tibble(p), # Return parameters
final_N = nrow(wide_df),
# Proportion of responses at ceiling for preferences and traits
df %>% summarise(
pref_at_ceiling = mean(pref_likert == 11),
trait_at_ceiling = mean(trait_likert == 11)
),
wide_df %>% summarise(
liking_at_ceiling = mean(liking_likert == 11),
liking_at_floor = mean(liking_likert == 1)
),
# Correlation between random responding and pattern metrics
df %>%
select(individual_id, random_responding) %>%
left_join(pattern_metric, by = "individual_id") %>%
summarise(
cor_random_responding_cpm = cor(random_responding, corrected_pattern_metric, use = 'pairwise.complete.obs'),
cor_random_responding_rpm = cor(random_responding, raw_pattern_metric, use = 'pairwise.complete.obs')
),
# Average correlation between preferences and traits across traits
df %>%
group_by(trait_id) %>%
summarise(avg_pref_trait_cor = cor(pref_likert, trait_likert, use = 'pairwise.complete.obs')) %>%
summarise(avg_pref_trait_cor = mean(avg_pref_trait_cor, na.rm = TRUE)),
# Standard deviations of trait and preference means
traits %>% summarise(
sd_norm_prefs = sd(pref_mean),
sd_traits = sd(trait_mean)
),
# Mean and SD of correlations among traits and preferences
tibble(
trait_cor_mean = mean(avg_trait_cor, na.rm = TRUE),
trait_cor_sd = sd(avg_trait_cor, na.rm = TRUE),
pref_cor_mean = mean(avg_pref_cor, na.rm = TRUE),
pref_cor_sd = sd(avg_pref_cor, na.rm = TRUE)
),
# Incremental variance explained by corrected pattern metric
incremental_var_explained_by_cpm = incremental_var_explained_by_cpm,
pattern_metric %>%
summarise(
cor_corrected_trait_mean = cor(corrected_pattern_metric, trait_mean, use = 'pairwise.complete.obs')
),
# Correlations between pattern metrics and liking
wide_df %>%
summarise(
avg_cor_corrected = mean(corrected_pattern_metric, na.rm = TRUE),
cor_corrected_latent = cor(corrected_pattern_metric, true_liking, use = 'pairwise.complete.obs'),
cor_corrected = cor(corrected_pattern_metric, liking_likert, use = 'pairwise.complete.obs'),
cor_test_p = broom::tidy(cor.test(corrected_pattern_metric, liking_likert))$p.value[1],
cor_raw = cor(raw_pattern_metric, liking_likert, use = 'pairwise.complete.obs'),
cor_norm = cor(norm_pattern_metric, liking_likert, use = 'pairwise.complete.obs')
)
)
write.table(results, file = "sim_results.csv", sep = ",",
row.names = FALSE, col.names = !file.exists("sim_results.csv"), append = TRUE)
as.data.frame(results)
}
# Test the run_simulation function with parameter set p
system.time({
p %>% pmap(run_simulation)
})
# Function to safely run simulation and capture errors
safe_run_simulation <- function(...) {
tryCatch({
result <- run_simulation(...)
cbind(success = TRUE, result)
}, error = function(e) {
cbind(success = FALSE, as_tibble(list(...)))
})
}
# Check memory usage before simulations
pryr::mem_used()
# Run simulations in parallel over the parameter grid
SimulationResults <- parameters %>%
pmap(
safe_run_simulation,
.progress = TRUE
# .options = furrr_options(seed = 20191005)
) %>%
# Combine all results into a single data frame
bind_rows()
# Check memory usage after simulations
pryr::mem_used()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment