Last active
July 24, 2022 05:08
-
-
Save HaiyangJin/097e33a79597ec73bcbf419c428086c1 to your computer and use it in GitHub Desktop.
The second version of function to simulate data for linear mixed models (https://haiyangjin.github.io/2020/09/simulate-data-v2/).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# function (version 2) to simulate the data set for fitting linear mixed models | |
simudata_lmm2 <- function (params=list()) { | |
# This function simulates a dataset for fitting linear mixed mode. Please make sure library(tidyverse) is installed properly. | |
# A more detailed explanation could be found [here](https://haiyangjin.github.io/2020/09/simulate-data-v2/) | |
# Created by Haiyang Jin (https://haiyangjin.github.io/) | |
# | |
# Usage: | |
# # You may set up any parameter in P (see below the default parameters), otherwise the default parameters will be used. | |
# devtools::source_gist("https://gist.github.com/HaiyangJin/097e33a79597ec73bcbf419c428086c1") # OR | |
# devtools::source_gist("https://github.com/HaiyangJin/Website-Shared-Code/blob/master/simulate_data_lmm_v2.R?raw=TRUE") | |
# simulation <- simudata_lmm2() # will use the default parameters | |
# # There are three outputs: | |
# # simudata: the simulated data file. | |
# # population: the true values of the six parameters when `contr.sdif` is used (P$alpha, P$beta_ori, P$beta_ml, P$beta_hm, P$beta_ori_ml, P$beta_ori_hm). | |
# # params : all the parameters used for the simulation (i.e., P in this function). | |
# load library | |
library(tidyverse) | |
############ set the default parameters ############ | |
P <- list( | |
N_subj = 30, | |
N_item = 30, | |
IV1 = c("upright", "inverted"), # orientation | |
IV2 = c("low", "medium", "high"), # spatial frequency | |
# define the population (fixed effects) parameters | |
alpha = -4, # the grand mean | |
beta_ori = -2, # orientation: inverted - upright | |
beta_ml = -1, # spatial frequency: medium - low | |
beta_hm = -.5, # spatial frequency: high - medium | |
beta_ori_ml = 1, # interaction between orientation and (medium-low) | |
beta_ori_hm = .5, # interaction between orientation and (high-medium) | |
# the sigma (residuals) | |
sigma = 2, | |
# by-subject random effects | |
alpha_u_sd = 2, # sd of the by-subject random intercepts of orientation | |
beta_ori_u_sd = .5, # sd of the by-subject random slopes of orientation | |
beta_ml_u_sd = .5, # sd of the by-subject random slopes of medium - low | |
beta_hm_u_sd = .5, # sd of the by-subject random slopes of high - medium | |
beta_ori_ml_u_sd = .5, # sd of the by-subject random slopes of interaction between orientation and (medium-low) | |
beta_ori_hm_u_sd = .5, # sd of the by-subject random slopes of interaction between orientation and (high-medium) | |
rho_u = 0.4, # correlations between the by-subject random effects | |
# by-item random effects | |
alpha_w_sd = 1, # sd of the by-item random intercepts of orientation | |
beta_ori_w_sd = .3, # sd of the by-item random slopes of orientation | |
beta_ml_w_sd = .3, # sd of the by-item random slopes of medium - low | |
beta_hm_w_sd = .3, # sd of the by-item random slopes of high - medium | |
beta_ori_ml_w_sd = .3, # sd of the by-item random slopes of interaction between orientation and (medium-low) | |
beta_ori_hm_w_sd = .3, # sd of the by-item random slopes of interaction between orientation and (high-medium) | |
rho_w = 0.3 # correlations between the by-item random effects | |
) | |
# update the default parameters with the input | |
for (temp_name in names(params)) { | |
P[[temp_name]] <- params[[temp_name]] | |
} | |
# summary of the true values | |
fixed_true <- c(P$alpha, P$beta_ori, P$beta_ml, | |
P$beta_hm, P$beta_ori_ml, P$beta_ori_hm) | |
u_sd_true <- c(P$alpha_u_sd, P$beta_ori_u_sd, P$beta_ml_u_sd, | |
P$beta_hm_u_sd, P$beta_ori_ml_u_sd, P$beta_ori_hm_u_sd) | |
w_sd_true <- c(P$alpha_w_sd, P$beta_ori_w_sd, P$beta_ml_w_sd, | |
P$beta_hm_w_sd, P$beta_ori_ml_w_sd, P$beta_ori_hm_w_sd) | |
############ the fixed effects ############ | |
nlevel_ori <- length(P$IV1) | |
nlevel_sf <- length(P$IV2) | |
N_cond <- nlevel_ori * nlevel_sf | |
N_trial <- N_cond * P$N_subj * P$N_item | |
# Create a experiment condition tibble | |
df_cond <- tibble( | |
subject = rep(rep(1:P$N_subj, each = P$N_item), each = N_cond), | |
stimulus = rep(rep(1:P$N_item, times = P$N_subj), each = N_cond), | |
Orientation = as_factor(rep(rep(P$IV1, each = nlevel_sf), times = P$N_subj * P$N_item)), | |
SF = as_factor(rep(P$IV2, times = P$N_subj * P$N_item * nlevel_ori)) | |
) | |
# set back difference coding for the independent variables | |
contrasts(df_cond$Orientation) <- MASS::contr.sdif(nlevel_ori) | |
contrasts(df_cond$SF) <- MASS::contr.sdif(nlevel_sf) | |
# Create the design matrix (including the interaction) | |
df_simu_design <- model.matrix( ~ 1 + Orientation * SF, df_cond) | |
# Simulating the fixed effects | |
dv_fixed <- df_simu_design %*% fixed_true | |
############ the random effects for subjects ############ | |
# random effects for subject | |
N_u_sd <- length(u_sd_true) | |
# correlation matrix | |
u_corr <- (1- diag(N_u_sd)) * P$rho_u + diag(N_u_sd) | |
# Cholesky factor | |
L_u <- chol(u_corr) | |
# # We can verify that we recover rho_u with: | |
# t(L_u) %*% L_u | |
# simulate random effects for subjects | |
# uncorrelated z values from the standard normal distribution for all random effects | |
z_u <- replicate(N_u_sd, rnorm(P$N_subj, 0, 1)) | |
# Variance matrix | |
u_var <- diag(N_u_sd) * u_sd_true | |
# random effects of subjects | |
u <- u_var %*% t(L_u) %*% t(z_u) | |
random_u <- t(u)[df_cond$subject, ] | |
# random effects of subjects for each trial | |
dv_u <- rowSums(df_simu_design * random_u) | |
############ the random effects for items ############ | |
# random effects for item | |
N_w_sd <- length(w_sd_true) | |
# correlation matrix | |
w_corr <- (1- diag(N_w_sd)) * P$rho_w + diag(N_w_sd) | |
# Cholesky factor | |
L_w <- chol(w_corr) | |
# # We verify that we recover rho_w, | |
# t(L_w) %*% L_w | |
# simulate random effects for subjects | |
# uncorrelated z values from the standard normal distribution for all random effects | |
z_w <- replicate(N_w_sd, rnorm(P$N_subj, 0, 1)) | |
# Variance matrix | |
w_var <- diag(N_w_sd) * w_sd_true | |
# random effects of items | |
w <- w_var %*% t(L_w) %*% t(z_w) | |
random_w <- t(w)[df_cond$stimulus, ] | |
# random effects of items for each trial | |
dv_w <- rowSums(df_simu_design * random_w) | |
############ generate the dependent variables ############ | |
# combine fixed, random effects and the sigma (residuals) | |
df_simu <- df_cond %>% | |
mutate(erp = dv_fixed[, 1] + dv_u + dv_w + rnorm(n(), 0, P$sigma)) | |
############ return the output ############ | |
contr_matrix <- df_simu_design %>% unique() %>% as.matrix() | |
population_true <- contr_matrix %*% as.matrix(fixed_true) %>% as.vector() | |
names(population_true) <- paste(rep(substr(P$IV1, 1, 3), each = 3), | |
rep(substr(P$IV2, 1, 3), times = 2), | |
sep = "_") | |
return(list(simudata = df_simu, | |
population = population_true, | |
params = P)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment