Skip to content

Instantly share code, notes, and snippets.

@stonegold546
Last active August 10, 2019 16:34
Show Gist options
  • Save stonegold546/d95c36da8f41c6113a1c812276facaec to your computer and use it in GitHub Desktop.
Save stonegold546/d95c36da8f41c6113a1c812276facaec to your computer and use it in GitHub Desktop.
Sum or factor score
library(lavaan)
library(psych)
sim.fun <- function (lv, lambda, nrep = 2e3) {
np <- nrow(lv)
t(replicate(nrep, {
X <- lv %*% lambda +
matrix(rnorm(np * length(lambda), 0, sqrt(1 - lambda ^ 2)), np, byrow = TRUE)
# summary(cfa(paste("F =~", paste0("V", 1:length(lambda), collapse = " + ")), X, std.lv = TRUE))
ss <- rowSums(X)
ss.fit <- unname(fitmeasures(
cfa(paste(paste("F =~ a *", paste0("V", 1:length(lambda), collapse = " + a * ")),
paste0("V", 1:length(lambda), " ~~ b * V", 1:length(lambda), collapse = "\n"),
sep = "\n"), X),
c("chisq", "df", "pvalue")))
fs <- predict(cfa(paste("F =~", paste0("V", 1:length(lambda), collapse = " + ")), X))
c(alpha = psych::alpha(X)$total$raw_alpha, ss = cor(lv, ss), fs = cor(lv, fs),
chisq = ss.fit[1], df = ss.fit[2], pvalue = ss.fit[3])
}))
}
# c(.7, .8, .6, .7, .75, .5, .85, .55, .65)
np <- 6e1
lat <- matrix(qnorm((1:np - .5) / np))
(loadings.1 <- seq(.9, .6, length.out = 9)) ^ 2
res.1 <- sim.fun(lat, loadings.1)
colMeans(res.1)
mean(res.1[, "pvalue"] < .05)
apply(res.1[, 1:3], 2, function (x) mean((x - 1) ^ 2))
(loadings.2 <- seq(.9, .6, length.out = 3)) ^ 2
res.2 <- sim.fun(lat, loadings.2)
colMeans(res.2)
mean(res.2[, "pvalue"] < .05)
apply(res.2[, 1:3], 2, function (x) mean((x - 1) ^ 2))
(loadings.3 <- seq(.5, .3, length.out = 9)) ^ 2
res.3 <- sim.fun(lat, loadings.3)
colMeans(res.3)
mean(res.3[, "pvalue"] < .05)
apply(res.3[, 1:3], 2, function (x) mean((x - 1) ^ 2))
(loadings.4 <- seq(.5, .3, length.out = 3)) ^ 2
res.4 <- sim.fun(lat, loadings.4)
colMeans(res.4)
mean(res.4[, "pvalue"] < .05)
apply(res.4[, 1:3], 2, function (x) mean((x - 1) ^ 2))
(loadings.5 <- seq(.9, .3, length.out = 9)) ^ 2
res.5 <- sim.fun(lat, loadings.5)
colMeans(res.5)
mean(res.5[, "pvalue"] < .05)
apply(res.5[, 1:3], 2, function (x) mean((x - 1) ^ 2))
library(dplyr)
library(ggplot2)
library(scales)
library(directlabels)
theme_set(theme_classic())
res.all <- bind_rows(lapply(list(res.1, res.2, res.3, res.4, res.5), as.data.frame), .id = "design") %>%
mutate(design = case_when(
design == 1 ~ "9 items strong",
design == 2 ~ "3 items strong",
design == 3 ~ "9 items weak",
design == 4 ~ "3 items weak",
design == 5 ~ "9 items hetero"
), design = factor(design, levels = c("3 items weak", "9 items weak", "3 items strong",
"9 items strong", "9 items hetero")))
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
res.all %>%
reshape(direction = "long", varying = 3:4, times = c("ss", "fs"), v.names = "correlation") %>%
ggplot(aes(abs(correlation), fill = time)) + geom_density(alpha = .5) +
geom_rug(aes(col = time), alpha = .25) +
facet_wrap(~ design, scales = "free") +
scale_fill_manual(values = cbbPalette, labels = c("Factor score", "Sum score")) +
scale_color_manual(values = cbbPalette, labels = c("Factor score", "Sum score")) +
theme(legend.position = c(.8, .3)) +
labs(x = "", fill = "Approach", col = "Approach",
subtitle = "Distribution of correlation to latent variable",
caption = "The axes are different for each panel")
ggsave("cor.pdf", height = 5, width = 7)
ggsave("cor.png", height = 5, width = 7)
res.all %>%
group_by(design) %>% mutate(power = mean(pvalue < .05)) %>%
ggplot(aes(x = design, y = power)) +
geom_segment(aes(xend = design, y = 0, yend = power), alpha = .00625) +
geom_rug(aes(y = pvalue), x = NA, length = unit(.25, "npc"), alpha = .0625) +
facet_wrap(~ reorder(design, -power), ncol = 1, scales = "free_y") +
geom_point(shape = 1, alpha = .00625) + coord_flip() +
geom_text(aes(label = percent(power, .1)), vjust = -1, alpha = .00625) +
scale_y_continuous(labels = percent_format()) +
labs(y = "Statistical power to detect misspecification of sum score model; alpha = .05", x = "",
caption = "Vertical strips represent distribution of p-values") +
theme(axis.line.y = element_blank(), axis.ticks.y = element_blank(),
strip.background = element_blank(), strip.text = element_blank())
ggsave("power_misspec.pdf", height = 4, width = 7)
ggsave("power_misspec.png", height = 4, width = 7)
(tmp <- res.all %>%
group_by(design) %>%
summarise(alpha = mean(alpha), ss = mean((ss - 1) ^ 2),
fs = mean((abs(fs) - 1) ^ 2),
ratio = number(fs / ss, .01)))
tmp %>%
ggplot(aes(ss, fs)) + geom_point(aes(size = ratio), shape = 1) + coord_fixed() +
geom_dl(aes(label = design), method = "smart.grid") +
scale_y_continuous(trans = log_trans(), breaks = round(tmp$fs, 4)) +
scale_x_continuous(trans = log_trans(), breaks = round(tmp$ss, 4)) +
scale_size_discrete(guide = guide_legend(reverse = TRUE)) +
theme(legend.box.just = 0:1, legend.position = c(.25, .75)) +
labs(x = "Sum MSE", y = "Factor MSE", size = "Factor MSE / Sum MSE",
subtitle = "Mean squared error of correlation to latent variable",
caption = "Axes are on natural log scale")
ggsave("mse.pdf", height = 5.5, width = 5)
ggsave("mse.png", height = 5.5, width = 5)
lmat <- data.frame(loadings = c(loadings.1, loadings.2, loadings.3, loadings.4, loadings.5),
design = c(rep("9 items strong", 9), rep("3 items strong", 3), rep("9 items weak", 9),
rep("3 items weak", 3), rep("9 items hetero", 9))) %>%
mutate(design = factor(design, levels = rev(c("9 items strong", "3 items strong", "9 items weak",
"3 items weak", "9 items hetero"))))
lmat %>%
ggplot(aes(design, loadings)) + geom_point(shape = 1) + coord_flip() +
scale_y_continuous(breaks = seq(.3, .9, .05),
sec.axis = sec_axis(trans = ~ . ^ 2, breaks = seq(.3, .9, .075) ^ 2,
labels = percent_format(), name = "R-square")) +
labs(x = "Loading set", y = "Loadings")
ggsave("loadings.pdf", height = 3, width = 7)
ggsave("loadings.png", height = 3, width = 7)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment