Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Created November 27, 2023 21:42
Show Gist options
  • Save sashagusev/a14ec545dd2d2aeb5482e451227a4468 to your computer and use it in GitHub Desktop.
Save sashagusev/a14ec545dd2d2aeb5482e451227a4468 to your computer and use it in GitHub Desktop.
# simple factor analysis example
# ---
# lavaan for factor analysis
library('lavaan')
# --- parameters
# number of people
N = 1e3
# number of factors
M = 100
# number of tests
M_tests = 8
# fraction of factors going into each test
p = 0.5
# measurement error
# error SD for each test will be sampled from between these values
min_err = 0.5
max_err = 1.5
# --- parameters
# 100 different random factors
latent = matrix(runif(N*M),nrow=N,ncol=M)
pdf("g_latent.pdf",height=4,width=5)
pheatmap(latent,cluster_rows=F,cluster_cols=F,treeheight_row=0,treeheight_col=0,border=NA,col = colorRampPalette(c("white", "black"))(50))
dev.off()
# 8 tests that sample from each of them
tests = matrix(NA,nrow=N,ncol=M_tests)
colnames(tests) = paste("T",1:ncol(tests),sep='')
shared = rnorm(N,0,0.1)
test_map = matrix(0,nrow=M,ncol=M_tests)
for ( i in 1:M_tests ) {
# sample factors going into this test
t = sample(1:M,M*p)
# record the mapping
test_map[t,i] = 1
# sample some measurement error for this test
noise_var = runif(1,min_err,max_err)
tests[,i] = scale(apply(latent[,t],1,mean)) + rnorm(N,0,noise_var)
}
pdf("g_tests.pdf",height=4,width=5)
pheatmap(test_map,xlab="Tests",ylab="Latent Factors",border="black",cluster_rows=T,cluster_cols=T,treeheight_row=0,treeheight_col=0,col = colorRampPalette(c("white", "black"))(50))
dev.off()
# positive correlation matrix
pdf("g_test_cor.pdf",height=4,width=5)
pheatmap(cor(tests)[,rev(1:M_tests)],display_numbers=T,number_format = "%.2f",cluster_rows=F,cluster_cols=F,treeheight_row=0,treeheight_col=0,col = colorRampPalette(c("white", "firebrick3"))(50))
dev.off()
# variance explained
pca = prcomp(tests,center=T,scale=T)
varexp = pca$sdev^2/sum(pca$sdev^2)
names(varexp) = 1:M_tests
pdf("g_var_exp.pdf",height=4,width=5)
barplot(varexp,border=NA,col="black",las=1,xlab="Factor",ylab="Variance Explained")
dev.off()
# CFA
model <- 'G =~ NA*T1+T2+T3+T4+T5+T6+T7+T8
G ~~ 1*G'
fit <- cfa(model, data=scale(tests), meanstructure=T, missing="fiml")
print(summary(fit, rsquare=T, fit.measures=F))
cat( "CFA metrics:\n" )
print(fitMeasures(fit, c("cfi","tli","rmsea","srmr") ))
cat( "Variance explained:" , varexp , '\n' )
# loadings
library('lavaanPlot')
pl = lavaanPlot(model=fit,coefs = TRUE,edge_options = list(color = "grey"))
embed_plot_pdf(pl, "~/Dropbox (Partners HealthCare)/TWT/g_factor_graph.pdf")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment