Skip to content

Instantly share code, notes, and snippets.

@barusan
Created April 3, 2016 07:58
Show Gist options
  • Save barusan/6d68b0abce47e1b519ab232725d90718 to your computer and use it in GitHub Desktop.
Save barusan/6d68b0abce47e1b519ab232725d90718 to your computer and use it in GitHub Desktop.
experiment of unbiased variance over normally-distributed data
#!/usr/local/bin/Rscript
# run on Mac OS X, homebrew R
library(ggplot2)
# sample sizes
sizes <- c(2, 5, 10, 20, 50, 100, 200, 500, 1000)
# return (x - 1) / x
ratio <- function(x) { (x - 1) / x }
# sample variance
variance <- function(x) { var(x) * ratio(length(x)) }
# data generation: dataSize samples x N trials
N <- 1000
genData <- function(dataSize) {
lapply(1:N, function(x) { rnorm(dataSize, mean=0.0, sd=1.0) })
}
# trial & evaluation
data <- lapply(sizes, genData)
sampleVars <- lapply(data, function(x) { sapply(x, variance) })
unbiasVars <- lapply(data, function(x) { sapply(x, var) })
sampleVarsMeans <- sapply(sampleVars, mean)
unbiasVarsMeans <- sapply(unbiasVars, mean)
# plot
quartz()
plot(sizes, sampleVarsMeans, log="x", ylim=c(0, 1.5),
col="blue", type="p", pch=15,
xlab="sample size",
ylab=sprintf("average of estimated variance over %d trials", N))
points(sizes, unbiasVarsMeans,
col="red", type="p", pch=16)
abline(1, 0, col="green4") # ground truth
lines(1:2000, sapply(1:2000, ratio), col="yellow4", lty=2) # (N-1)/N
legend("topright",
c("sample variance", "unbiased variance",
"ground truth", "(N - 1) / N"),
col=c("blue","red","green4","yellow4"),
lty=c(NA,NA,1,2), lwd=c(NA,NA,1,1), pch=c(15,16,NA,NA))
locator() # do not close the window automatically
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment