Skip to content

Instantly share code, notes, and snippets.

@barusan
Last active April 20, 2016 10:14
Show Gist options
  • Save barusan/4b799594a510fbf9550b052421d524b7 to your computer and use it in GitHub Desktop.
Save barusan/4b799594a510fbf9550b052421d524b7 to your computer and use it in GitHub Desktop.
experiment of unbiased variance over normally-distributed data
#!/usr/bin/env python2
#-*- coding: utf-8 -*-
import math
import numpy as np
import matplotlib.pyplot as plt
MEAN = 0.0
VARIANCE = 1.0
NTRIAL = 1000
Ns = [2, 5, 10, 20, 50, 100, 200, 500, 1000]
sample_vars = []
unbiased_vars = []
for N in Ns:
svars = []
uvars = []
for trial in range(NTRIAL):
data = np.random.normal(MEAN, math.sqrt(VARIANCE), N)
svar = np.var(data, ddof=0) # sample variance
uvar = np.var(data, ddof=1) # unbiased variance
svars.append(svar)
uvars.append(uvar)
sample_vars.append(np.mean(svars))
unbiased_vars.append(np.mean(uvars))
plt.plot([0, 2000], [1.0, 1.0], "g-", label="ground truth")
X = np.arange(1, 2000, 1)
plt.plot(X, map(lambda x: float(x-1)/float(x), X), "y--", label="$(N-1)/N$")
plt.plot(Ns, sample_vars, "bs", ms=7, label="sample variance")
plt.plot(Ns, unbiased_vars, "ro", ms=7, label="unbiased variance")
plt.xscale("log")
plt.xlim(xmax=2000)
plt.ylim(ymax=1.5)
plt.xlabel("sample size")
plt.ylabel("average of estimated variance over %d trials" % NTRIAL)
plt.legend()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment