bthirion/problem_gcv.py

## problem_gcv.py
import numpy as np
from sklearn import linear_model

def _gcv(X, y, alphas):
    """Local gcv"""
    # singular values of the design matrix
    _, s, _ = np.linalg.svd(X, 0)
    clf = linear_model.Ridge()
    best_gcv = np.inf
    for alpha in alphas:
        dof = np.sum(s ** 2 / (s ** 2 + alpha)) # ridge degrees of freedom
        clf.alpha = alpha
        clf.fit(X, y)
        sse = (1 - clf.score(X, y)) * np.var(y) * n1
        gcv_ = sse / n1 / (1 - dof / n1) ** 2
        if n1 > p:
            M = np.dot(np.dot(
                    X, np.linalg.inv(np.dot(X.T, X) + alpha * np.eye(p))), X.T)
        else:
            K = np.dot(X, X.T)
            M = (K - np.dot(np.dot(K, np.linalg.inv(K + np.eye(n1) * alpha)), K)) / alpha
        gcv_ = np.sum(((y - clf.predict(X)) / (1 - np.diag(M))) ** 2)
        if gcv_ < best_gcv:
            best_gcv = gcv_
            best_alpha = alpha
    return best_alpha, best_gcv

# set the parameters
alphas = np.logspace(-2, 6, 9)
n1, p = 100, 200
# n1: number of training samples
# p: number of features
np.random.seed([2])

# generate the data
y, X, beta = (np.random.randn(n1), np.random.randn(n1, p), np.random.randn(p))
beta *= .1 # control the SNR
y += np.dot(X, beta)
alpha, _ = _gcv(X, y, alphas)

# try with sklearn's GCV
gcv1 = linear_model.RidgeCV(alphas=alphas).fit(X, y)

print 'sklearns, alpha: ', gcv1.alpha_, 'local implementation: ', alpha

gcv2 = linear_model.RidgeCV(alphas=[alpha]).fit(X, y)

# our estimate is actually much better than scikit learn's one
print 'error on beta: %f %f' % (np.sum((gcv1.coef_ - beta) ** 2),
                                np.sum((gcv2.coef_ - beta) ** 2))

# this can easily be fixed by not applying the centering of the matrix X
# sklearn.linear_model.ridge, line 564
	import numpy as np
	from sklearn import linear_model

	def _gcv(X, y, alphas):
	"""Local gcv"""
	# singular values of the design matrix
	_, s, _ = np.linalg.svd(X, 0)
	clf = linear_model.Ridge()
	best_gcv = np.inf
	for alpha in alphas:
	dof = np.sum(s 2 / (s 2 + alpha)) # ridge degrees of freedom
	clf.alpha = alpha
	clf.fit(X, y)
	sse = (1 - clf.score(X, y)) * np.var(y) * n1
	gcv_ = sse / n1 / (1 - dof / n1) ** 2
	if n1 > p:
	M = np.dot(np.dot(
	X, np.linalg.inv(np.dot(X.T, X) + alpha * np.eye(p))), X.T)
	else:
	K = np.dot(X, X.T)
	M = (K - np.dot(np.dot(K, np.linalg.inv(K + np.eye(n1) * alpha)), K)) / alpha
	gcv_ = np.sum(((y - clf.predict(X)) / (1 - np.diag(M))) ** 2)
	if gcv_ < best_gcv:
	best_gcv = gcv_
	best_alpha = alpha
	return best_alpha, best_gcv

	# set the parameters
	alphas = np.logspace(-2, 6, 9)
	n1, p = 100, 200
	# n1: number of training samples
	# p: number of features
	np.random.seed([2])

	# generate the data
	y, X, beta = (np.random.randn(n1), np.random.randn(n1, p), np.random.randn(p))
	beta *= .1 # control the SNR
	y += np.dot(X, beta)
	alpha, _ = _gcv(X, y, alphas)

	# try with sklearn's GCV
	gcv1 = linear_model.RidgeCV(alphas=alphas).fit(X, y)

	print 'sklearns, alpha: ', gcv1.alpha_, 'local implementation: ', alpha

	gcv2 = linear_model.RidgeCV(alphas=[alpha]).fit(X, y)

	# our estimate is actually much better than scikit learn's one
	print 'error on beta: %f %f' % (np.sum((gcv1.coef_ - beta) ** 2),
	np.sum((gcv2.coef_ - beta) ** 2))

	# this can easily be fixed by not applying the centering of the matrix X
	# sklearn.linear_model.ridge, line 564