Skip to content

Instantly share code, notes, and snippets.

@Quantisan
Last active December 10, 2015 22:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Quantisan/4502739 to your computer and use it in GitHub Desktop.
Save Quantisan/4502739 to your computer and use it in GitHub Desktop.
illustrating statistical type I and II errors
test.flips <- function(N, A.prop=0.5, B.prop=0.5, attr="p.value") {
heads.A <- rbinom(1, N, A.prop)
heads.B <- rbinom(1, N, B.prop)
test <- prop.test(c(heads.A, heads.B), n=c(N, N), alternative="two.sided")
return(as.numeric(test[attr]))
}
## vary number of flips
N <- seq(1, 1001, by=10)
p.values <- sapply(N, test.flips, A.prop=0.5, B.prop=0.5)
data <- cbind(N, p.values)
require('ggplot2')
p <- ggplot(as.data.frame(data), aes(x=N, y=p.values))
p <- p + geom_line() + geom_abline(intercept = log10(0.05), slope=0, colour="red") + scale_y_log10() + labs(title="Test if two fair coins are different", x="Number of flips")
print(p)
## sample the same number of flips
p.values <- replicate(100, test.flips(1000))
data <- as.data.frame(cbind(row(as.data.frame(p.values)), p.values))
colnames(data) <- c("index", "p.values")
p <- ggplot(data, aes(x=index, y=p.values))
p <- p + geom_line() + geom_abline(intercept = log10(0.05), slope=0, colour="red") + scale_y_log10() + labs(title="Flipping two fair coins 1000 times", x="Observation ID")
print(p)
## Power calculation
power <- power.prop.test(p1=0.5, p2=0.501, power=0.90, alternative="two.sided")
print(power)
## Running the tests at sufficient samples
p.values <- replicate(100, test.flips(round(power$n)))
data <- as.data.frame(cbind(row(as.data.frame(p.values)), p.values))
colnames(data) <- c("index", "p.values")
p <- ggplot(data, aes(x=index, y=p.values))
p <- p + geom_line() + geom_abline(intercept = log10(0.05), slope=0, colour="red") + scale_y_log10() + labs(title=paste("Flipping two fair coins ", round(power$n), " times"), x="Observation ID")
print(p)
## False negative
N <- seq(1, 100001, by=10)
p.values <- sapply(N, test.flips, A.prop=0.5, B.prop=0.51)
data <- cbind(N, p.values)
p <- ggplot(as.data.frame(data), aes(x=N, y=p.values))
p <- p + geom_line() + geom_abline(intercept = log10(0.05), slope=0, colour="red") + scale_y_log10() + labs(title="Test if two marginally different coins (0.5, 0.51) are different", x="Number of flips")
print(p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment