Skip to content

Instantly share code, notes, and snippets.

@mja
Forked from johnmyleswhite/gist:4596783
Last active December 11, 2015 12:08
Show Gist options
  • Save mja/4598584 to your computer and use it in GitHub Desktop.
Save mja/4598584 to your computer and use it in GitHub Desktop.
library("ggplot2")
library(plyr)
n.sims <- 100
max.n.vars <- 100
n.obs <- 100
res <- data.frame()
for (sim in 1:n.sims)
{
for (n.vars in 1:max.n.vars)
{
y <- rnorm(n.obs)
x <- matrix(NA, nrow = n.obs, ncol = n.vars)
for (j in 1:n.vars)
{
x[, j] <- rnorm(n.obs)
}
fit <- lm(y ~ x - 1)
p.vals <- coef(summary(fit))[, 4]
false.positives <- sum(p.vals < 0.05)
res <- rbind(res, data.frame(Sim = sim, Vars = n.vars, FP = false.positives))
}
}
ggplot(res, aes(x = Vars, y = FP)) +
geom_smooth() +
xlab("Number of Variables in Multiple Regression") +
ylab("Average Number of Variables for which p < 0.05") +
ggtitle("Number of False Positives Grows Linearly with Number of Variables")
ggsave("false_positives.pdf")
# False positive rate per number of variables
Rate <- ddply(res, .(Vars), function(x) data.frame(rate=sum(x$FP) / sum(x$Vars)))
ggplot(Rate, aes(x=Vars, y=rate)) +
geom_point() +
stat_smooth(method=lm) +
xlab("Number of Variables in Multiple Regression") +
ylab("False positive rate") +
ggtitle("Rate of False Positives is Constant")
# Family-wise error rate = % of times we get any false positives for a
# given number of predictors
FWER <- ddply(res, .(Vars), function(x) data.frame(fwer=mean(x$FP > 0)))
ggplot(FWER, aes(x=Vars, y=fwer)) +
geom_point() +
stat_smooth() +
scale_y_continuous(limits=c(0, 1)) +
xlab("Number of Variables in Multiple Regression") +
ylab("Familywise error rate") +
ggtitle("Familywise error rate increases until parameters saturate the data")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment