Skip to content

Instantly share code, notes, and snippets.

@zaltoprofen
Last active May 14, 2016 12:34
Show Gist options
  • Save zaltoprofen/7301af8d1917d993f370e53c6bec2d64 to your computer and use it in GitHub Desktop.
Save zaltoprofen/7301af8d1917d993f370e53c6bec2d64 to your computer and use it in GitHub Desktop.
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.pipeline import Pipeline
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats
modelF = lambda deg: Pipeline([
('poly', PolynomialFeatures(degree=deg)),
('linear', LinearRegression(fit_intercept=False))])
p = 11
beta = np.random.normal(0, 10, p)
def f(x):
return sum(beta[i]*x**i for i in range(p))
X = (np.random.rand(30) * 10 - 5)
Y = f(X) + np.random.normal(0, 5, size=X.shape)
poor_model = modelF(7)
rich_model = modelF(8)
poor_model = poor_model.fit(X[:, np.newaxis], Y)
rich_model = rich_model.fit(X[:, np.newaxis], Y)
def plot_model(model, name):
X = np.arange(-5, 5, 0.01)
eY = model.predict(X[:, np.newaxis])
plt.plot(X, eY, '-', label=name)
def likelihood(model, X, y):
ey = model.predict(X[:, np.newaxis])
rss = np.linalg.norm(y - ey)**2
n = y.size
return -n/2 * (np.log(2*np.pi) + 1 + np.log(rss/n))
def parametric_bootstrap(X, y, model1, model2, n=1000):
def get_dd(X, y, model1, model2, sigma2):
new_y = ey + np.random.normal(0, np.sqrt(sigma2), y.size)
model1 = model1.fit(X[:, np.newaxis], new_y)
model2 = model2.fit(X[:, np.newaxis], new_y)
return -2 * (likelihood(model1, X, new_y) - likelihood(model2, X, new_y))
model1 = model1.fit(X[:, np.newaxis], y)
ey = model1.predict(X[:, np.newaxis])
rss = np.linalg.norm(y - ey)**2
sigma2 = rss/y.size
return np.array([get_dd(X, y, model1, model2, sigma2) for k in range(n)])
L1 = likelihood(poor_model, X, Y)
print('poor model log-likelihood: %f' % L1)
L2 = likelihood(rich_model, X, Y)
print('rich model log-likelihood: %f' % L2)
DD12 = -2 * (L1 - L2)
print('Delta D(1,2): %f' % DD12)
samples_pb = 2000
dd12 = parametric_bootstrap(X, Y, poor_model, rich_model, samples_pb)
larger_than_DD12 = sum(dd12 >= DD12)
print('dd12 >= %f: %d' % (DD12, larger_than_DD12))
print('p(dd12 >= %f)=%f' % (DD12, float(larger_than_DD12)/samples_pb))
print('p(dd12\' >= %f)=%f' % (DD12, scipy.stats.chi2.sf(DD12, 1)))
plt.hist(dd12, bins=30, normed=True)
plt.savefig('plot.png')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment