Skip to content

Instantly share code, notes, and snippets.

@SachaEpskamp
Last active March 23, 2021 18:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SachaEpskamp/9e9aee4be51dcfa425f70d9b608d55d0 to your computer and use it in GitHub Desktop.
Save SachaEpskamp/9e9aee4be51dcfa425f70d9b608d55d0 to your computer and use it in GitHub Desktop.
# This script contains some functions to automate things:
cat("THIS FUNCTION IS OUTDATED, PLEASE SEE https://github.com/SachaEpskamp/RIVMgrowth")
# Function to automize dummy encoding:
rivm_lgc_dummy <- function(
data, # Dataset
design, # Design matrix, as in psychonetrics
type = c("non-linear","linear"), # Type of analysis to do
predictor_var, # Variable name of predictor (only one supported at the moment)
time # time vector encoding distances in time
){
library("dplyr")
# Check data:
stopifnot(is.data.frame(data))
# Check design matrix:
allVars <- unlist(design)
allVars <- allVars[!is.na(allVars)]
if (!all(allVars %in% names(data))){
stop("Not all names in the design matrix correspond to variables in the dataset.")
}
# Extract from design:
if (!is.matrix(design)){
design <- t(design)
}
nVar <- nrow(design)
nWave <- ncol(design)
# Names of design:
if (is.null(rownames(design))){
rownames(design) <- paste0("Var_",seq_len(nVar))
}
variables <- rownames(design)
# Time vector:
if (missing(time)){
time <- seq_len(nWave)
}
# Type of analysis:
type <- match.arg(type)
# Check if predictor is present:
use_predictor <- !missing(predictor_var)
# Setup dummy:
if (use_predictor){
# Check data:
if (!predictor_var %in% names(data)){
stop("predictor_var does not correspond to a variable in the dataset")
}
# Make factor if needed:
if (!is.factor(data[[predictor_var]])){
data[[predictor_var]] <- as.factor(data[[predictor_var]])
}
# Make dummy variables:
predictor_levels <- levels(data[[predictor_var]])
nlevels_predictor <- length(predictor_levels)
nDummy <- nlevels_predictor-1
# Dummy coding:
dummyvars <- paste0("DUMMY_",seq_len(nDummy))
for (d in seq_len(nDummy)){
data[[dummyvars[[d]]]] <- 1* (data[[predictor_var]] == predictor_levels[d+1])
}
} else {
nDummy <- 0
dummyvars <- character(0)
}
# Names of parameters needed in the model:
intercepts <- paste0("int_",variables)
slopes <- paste0("slope_",variables)
residual <- paste0("resid_",variables)
# Slope parameters:
if (type == "non-linear"){
slope_pars <- lapply(variables,function(x){
c(0,1,paste0("slope_",x,"_",3:nWave))
})
names(slope_pars) <- variables
} else {
slope_pars <- lapply(variables,function(x){
time - time[1]
})
names(slope_pars) <- variables
}
# Intercept mean parameters:
int_mean_pars <- lapply(variables,function(x){
paste0("intercept_model_",x,"_",seq_len(nDummy+1)-1)
})
names(int_mean_pars) <- variables
# Slope mean parameters:
slope_mean_pars <- lapply(variables,function(x){
paste0("slope_model_",x,"_",seq_len(nDummy+1)-1)
})
names(slope_mean_pars) <- variables
# Start forming the model:
mod <- character(0)
cur_line <- 0
# loop over variables:
for (v in seq_len(nVar)){
# Variables from the design matrix:
curvars <- design[v,]
# Which are not NA?
incl <- !is.na(curvars)
inclvars <- which(incl)
# Cut out NA:
curvars <- curvars[!is.na(curvars)]
# Update the line:
cur_line <- cur_line + 2
# Header:
mod[cur_line] <- paste0("# Intercepts for variable ",variables[v])
# Update the line:
cur_line <- cur_line + 1
# Write intercepts:
mod[cur_line] <- paste0(intercepts[v], " =~ ", paste0("1 * ",curvars, collapse = " + "))
# Update the line:
cur_line <- cur_line + 1
# Now model the intercept:
mod[cur_line] <- paste0(intercepts[v], " ~ ", paste0(int_mean_pars[[v]],"*",c("1", dummyvars), collapse = " + "))
# Update the line:
cur_line <- cur_line + 2
# Header:
mod[cur_line] <- paste0("# Slopes for variable ",variables[v])
# Update the line:
cur_line <- cur_line + 1
# Write slopes:
mod[cur_line] <- paste0(slopes[v], " =~ ", paste0(slope_pars[[v]][incl]," * ",curvars, collapse = " + "))
# Update the line:
cur_line <- cur_line + 1
# Now model the slopes:
mod[cur_line] <- paste0(slopes[v], " ~ ", paste0(slope_mean_pars[[v]],"*",c("1", dummyvars), collapse = " + "))
# Update the line:
cur_line <- cur_line + 2
# Header:
mod[cur_line] <- paste0("# Residuals for variable ",variables[v])
# Write residuals:
for (i in seq_along(curvars)){
# Update the line:
cur_line <- cur_line + 1
mod[cur_line] <- paste0(curvars[i], " ~~ ", residual[v]," * ",curvars[i])
}
}
# Now add all expected values of all groups at all waves for all variables:
if (use_predictor){
ev_df <- expand.grid(
predictor = seq_along(predictor_levels),
wave = seq_len(nWave),
var = variables,
stringsAsFactors = FALSE
)
names(ev_df) <- c("predictor","wave","var")
# Add dummies:
for (d in seq_len(nDummy)){
ev_df[[dummyvars[[d]]]] <- 1* (predictor_levels[ev_df$predictor] == predictor_levels[d+1])
}
# Parameter name:
ev_df$par <- paste0(ev_df$var,"_wave",ev_df$wave,"_",ev_df$predictor)
} else {
ev_df <- expand.grid(
wave = seq_len(nWave),
var = variables,
stringsAsFactors = FALSE
)
# Parameter name:
ev_df$par <- paste0(ev_df$var,"_wave",ev_df$wave)
}
# Update the line:
cur_line <- cur_line + 2
# Header:
mod[cur_line] <- "# Dummy encoding"
# Loop over these to add:
for (i in seq_len(nrow(ev_df))){
var <- ev_df$var[i]
wave <- ev_df$wave[i]
dummies <- unlist(ev_df[i,dummyvars])
# Current parameter:
curpar <- ev_df$par[i]
# Update the line:
cur_line <- cur_line + 1
# Write the line:
mod[cur_line] <- paste0(
curpar, " := ", paste0(
int_mean_pars[[var]], "*", c("1",dummies),
collapse = " + "
), " + ",
paste0(
slope_mean_pars[[var]], "*", c("1",dummies), " * ", slope_pars[[v]][wave],
collapse = " + "
)
)
}
# Fix empty lines:
mod[is.na(mod)] <- ""
# Aggregate:
model <- paste0(mod, collapse = "\n")
# Run lavaan model:
fit <- growth(model, data, missing = "fiml")
# Connect the parameter estimates to the expected value table:
parTable <- parameterEstimates(fit, standardized = TRUE)
# Join with the expected values:
ev_df <- ev_df %>% left_join(parTable, by = c("par" = "label"))
# Add time:
ev_df$time_of_wave <- time[ev_df$wave]
# Fix predictor:
if (use_predictor){
ev_df$predictor <- predictor_levels[ev_df$predictor]
}
# Select columns:
if (use_predictor){
ev_df <- ev_df[,c("var","wave","time_of_wave","predictor","est","se","ci.lower","ci.upper")]
} else {
ev_df <- ev_df[,c("var","wave","time_of_wave","est","se","ci.lower","ci.upper")]
}
# Significance of the regressions:
reg_df <- parTable %>% filter(op %in% c("~1", "~"), lhs %in% c(intercepts, slopes)) %>%
select(-label)
# Correlations between slopes:
cor_df <- parTable %>% filter(op == "~~", lhs %in% c(intercepts, slopes), rhs %in% c(intercepts, slopes)) %>%
select(lhs, op, rhs, est, se, z, pvalue, ci.lower, ci.upper, std.all)
# Make list:
results <- list(
lavresult = fit,
expected_values = ev_df,
regressions = reg_df,
correlations = cor_df,
time = time,
use_predictor = use_predictor
)
class(results) <- "rivm_lgc_dummy"
# Return:
return(results)
}
# Print method:
print.rivm_lgc_dummy <- function(x){
cat("=== RIVM Latent Growth Curve Analysis ===","\n\n")
cat("Lavaan fit result: \n")
print(x$lavresult)
cat("\n\nLavaan fit measures: \n")
fit <- round(fitMeasures(x$lavresult)[c("chisq", "df", "pvalue", "cfi", "tli", "nnfi", "rfi",
"nfi", "pnfi", "ifi", "rni", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
"rmsea.pvalue", "srmr", "gfi", "agfi")], 2)
df <- data.frame(
index = names(fit),
value = fit
)
rownames(df) <- NULL
print(df, digits = 2)
cat("\n\nRegression coefficients: \n")
print(x$regressions, digits = 2)
cat("\n\n(co)variance and correlation coefficients: \n")
print(x$correlations, digits = 2)
cat("\n\nExpected values: \n")
print(x$expected_values, digits = 2)
cat("\n\n=========================================")
}
# Plot method:
plot.rivm_lgc_dummy <- function(x, predictor_label = "Predictor", wave_label = "Wave "){
library("ggplot2")
if (x$use_predictor){
p <- ggplot(x$expected_values,
# With aesthetics:
aes(
x = time_of_wave, # This is the variable that enocdes the time of the wave.
y = est, # The estimate
ymin = ci.lower, # If we want CIs
ymax = ci.upper, # If we want CIs
colour = predictor,
fill = predictor
)) +
# Nicer legend (and colorblind theme):
scale_colour_manual(predictor_label,
values = qgraph:::colorblind(length(unique(x$expected_values$predictor))))
} else {
p <- ggplot(x$expected_values,
# With aesthetics:
aes(
x = time_of_wave, # This is the variable that enocdes the time of the wave.
y = est, # The estimate
ymin = ci.lower, # If we want CIs
ymax = ci.upper # If we want CIs
))
}
p <- p +
# Seperate plot per variable:
facet_grid(var ~ ., scales = "free_y") +
# Add confidence region (uncomment to remove):
geom_ribbon(alpha = 0.1, show.legend = FALSE, colour = NA) +
# Add lines:
geom_line(lwd=1.2) +
# Add points:
geom_point(cex = 3, pch = 21, fill = "white", stroke = 1.5) +
# Nicer theme:
theme_bw() +
# Set x axis scale to have waves instead of numbers:
scale_x_continuous(breaks = sort(unique(x$expected_values$time_of_wave)),
labels = paste0(wave_label," ",sort(unique(x$expected_values$wave)))) +
# No labels:
xlab("") +
ylab("") +
# Larger fonts:
theme(text = element_text(size = 20), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
return(p)
}
# Correlation plot:
corplot <- function(x, legend.position = "topright"){
library("dplyr")
library("qgraph")
cors <- x$correlations
cors <- cors %>% filter(lhs != rhs) %>% select(
lhs = lhs, rhs = rhs, weight = std.all, pvalue = pvalue
) %>%
mutate(
lhs = gsub("\\_","\n",lhs),
rhs = gsub("\\_","\n",rhs)
) %>% mutate(
lhs = gsub("int\n","intercept\n",lhs),
rhs = gsub("int\n","intercept\n",rhs)
)
cors$lty <- 3
cors$lty[cors$pvalue < 0.05] <- 2
cors$lty[cors$pvalue < 0.01] <- 1
qgraph(cors[,1:3], directed = FALSE, theme = "colorblind",
edge.labels = TRUE, lty = cors$lty, vsize = 14)
legend(legend.position, lty = c(1,2,3),legend = c("p < 0.01","p < 0.05","p > 0.05"),
bty = "n", cex = 1.5, lwd = 3)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment