johnmyleswhite/gist:4596783

## gistfile1.r
library("ggplot2")

n.sims <- 100
max.n.vars <- 100
n.obs <- 100

res <- data.frame()

for (sim in 1:n.sims)
{
	for (n.vars in 1:max.n.vars)
	{
		y <- rnorm(n.obs)
		x <- matrix(NA, nrow = n.obs, ncol = n.vars)
		for (j in 1:n.vars)
		{
			x[, j] <- rnorm(n.obs)
		}
		fit <- lm(y ~ x - 1)
		p.vals <- coef(summary(fit))[, 4]
		false.positives <- sum(p.vals < 0.05)
		res <- rbind(res, data.frame(Sim = sim, Vars = n.vars, FP = false.positives))
	}
}

ggplot(res, aes(x = Vars, y = FP)) +
    geom_smooth() +
    xlab("Number of Variables in Multiple Regression") +
    ylab("Average Number of Variables for which p < 0.05") +
    ggtitle("Number of False Positives Grows Linearly with Number of Variables")
ggsave("false_positives.pdf")
	library("ggplot2")

	n.sims <- 100
	max.n.vars <- 100
	n.obs <- 100

	res <- data.frame()

	for (sim in 1:n.sims)
	{
	for (n.vars in 1:max.n.vars)
	{
	y <- rnorm(n.obs)
	x <- matrix(NA, nrow = n.obs, ncol = n.vars)
	for (j in 1:n.vars)
	{
	x[, j] <- rnorm(n.obs)
	}
	fit <- lm(y ~ x - 1)
	p.vals <- coef(summary(fit))[, 4]
	false.positives <- sum(p.vals < 0.05)
	res <- rbind(res, data.frame(Sim = sim, Vars = n.vars, FP = false.positives))
	}
	}

	ggplot(res, aes(x = Vars, y = FP)) +
	geom_smooth() +
	xlab("Number of Variables in Multiple Regression") +
	ylab("Average Number of Variables for which p < 0.05") +
	ggtitle("Number of False Positives Grows Linearly with Number of Variables")
	ggsave("false_positives.pdf")