barusan/gaussian_mle.r

## gaussian_mle.r
#!/usr/local/bin/Rscript
# run on Mac OS X, homebrew R

library(ggplot2)

# sample sizes
sizes <- c(2, 5, 10, 20, 50, 100, 200, 500, 1000)

# return (x - 1) / x
ratio <- function(x) { (x - 1) / x }

# sample variance
variance <- function(x) { var(x) * ratio(length(x)) }

# data generation: dataSize samples x N trials
N <- 1000
genData <- function(dataSize) {
	lapply(1:N, function(x) { rnorm(dataSize, mean=0.0, sd=1.0) })
}

# trial & evaluation
data <- lapply(sizes, genData)
sampleVars <- lapply(data, function(x) { sapply(x, variance) })
unbiasVars <- lapply(data, function(x) { sapply(x, var) })
sampleVarsMeans <- sapply(sampleVars, mean)
unbiasVarsMeans <- sapply(unbiasVars, mean)

# plot
quartz()
plot(sizes, sampleVarsMeans, log="x", ylim=c(0, 1.5),
     col="blue", type="p", pch=15,
     xlab="sample size",
     ylab=sprintf("average of estimated variance over %d trials", N))
points(sizes, unbiasVarsMeans,
     col="red", type="p", pch=16)
abline(1, 0, col="green4")  # ground truth
lines(1:2000, sapply(1:2000, ratio), col="yellow4", lty=2)  # (N-1)/N
legend("topright",
       c("sample variance", "unbiased variance",
         "ground truth", "(N - 1) / N"),
       col=c("blue","red","green4","yellow4"),
       lty=c(NA,NA,1,2), lwd=c(NA,NA,1,1), pch=c(15,16,NA,NA))
locator()  # do not close the window automatically
dev.off()
	#!/usr/local/bin/Rscript
	# run on Mac OS X, homebrew R

	library(ggplot2)

	# sample sizes
	sizes <- c(2, 5, 10, 20, 50, 100, 200, 500, 1000)

	# return (x - 1) / x
	ratio <- function(x) { (x - 1) / x }

	# sample variance
	variance <- function(x) { var(x) * ratio(length(x)) }

	# data generation: dataSize samples x N trials
	N <- 1000
	genData <- function(dataSize) {
	lapply(1:N, function(x) { rnorm(dataSize, mean=0.0, sd=1.0) })
	}

	# trial & evaluation
	data <- lapply(sizes, genData)
	sampleVars <- lapply(data, function(x) { sapply(x, variance) })
	unbiasVars <- lapply(data, function(x) { sapply(x, var) })
	sampleVarsMeans <- sapply(sampleVars, mean)
	unbiasVarsMeans <- sapply(unbiasVars, mean)

	# plot
	quartz()
	plot(sizes, sampleVarsMeans, log="x", ylim=c(0, 1.5),
	col="blue", type="p", pch=15,
	xlab="sample size",
	ylab=sprintf("average of estimated variance over %d trials", N))
	points(sizes, unbiasVarsMeans,
	col="red", type="p", pch=16)
	abline(1, 0, col="green4") # ground truth
	lines(1:2000, sapply(1:2000, ratio), col="yellow4", lty=2) # (N-1)/N
	legend("topright",
	c("sample variance", "unbiased variance",
	"ground truth", "(N - 1) / N"),
	col=c("blue","red","green4","yellow4"),
	lty=c(NA,NA,1,2), lwd=c(NA,NA,1,1), pch=c(15,16,NA,NA))
	locator() # do not close the window automatically
	dev.off()