Created
July 13, 2014 17:07
-
-
Save eickenberg/e9eb106f12e40d017fd0 to your computer and use it in GitHub Desktop.
Ridge Path sketches
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# X.dot(X.T) + alpha * eye = U.dot(S ** 2 + alpha).dot(U.T) | |
# X.T.dot(inv(X.dot(X.T) + alpha * eye)) = V.dot(S / (S **2 + alpha).dot(U.T)) | |
def _linear_kernel_ridge_path_svd( | |
U_train, S_train, VT_train, Y_train, alphas, X_test=None): | |
UTY = U_train.T.dot(Y_train) | |
if X_test is not None: | |
dekernelize = X_test.dot(VT_train.T) | |
else: | |
dekernelize = VT_train.T | |
S_and_alpha = S_train[:, np.newaxis] / ( | |
S_train[np.newaxis, :, np.newaxis] ** 2 + alphas[:, np.newaxis]) | |
# this should be done in place | |
S_alpha_UTY = S_and_alpha * UTY[np.newaxis] | |
output = dekernelize.dot(S_alpha_UTY).transpose(1, 0, 2) | |
if X_test is None: | |
return output, None | |
else: | |
return None, output | |
def linear_kernel_ridge_path_svd(X_train, Y_train, alphas, X_test=None): | |
U, S, VT = np.linalg.svd(X_train, full_matrices=False) | |
coef, predictions = _linear_kernel_ridge_path_svd( | |
U, S, VT, Y_train, alphas, X_test) | |
return coef, predictions | |
def _precomp_kernel_path_eigen( | |
v_train, V_train, Y_train, alphas, gramX_test=None): | |
VTY = V_train.T.dot(Y_train) | |
v_alpha = 1. / (v_train[np.newaxis, :, np.newaxis] + | |
alphas[:, np.newaxis]) | |
# should probably be done inplace | |
v_alpha_VTY = v_alpha * VTY[np.newaxis] | |
if gramX_test is None: | |
# dual coef | |
dual_coef = V_train.dot(v_alpha_VTY).transpose(1, 0, 2) | |
predictions = None | |
else: | |
dual_coef = None | |
predictions = (gramX_test.dot(V_train)).dot( | |
v_alpha_VTY).transpose(1, 0, 2) | |
return dual_coef, predictions | |
def precomp_kernel_path_eigen(gramX_train, Y_train, alphas, gramX_test=None): | |
v, V = np.linalg.eigh(gramX_train) | |
dual_coef, predictions = _precomp_kernel_path_eigen( | |
v, V, Y_train, alphas, gramX_test) | |
return dual_coef, predictions | |
def _linear_kernel(X, Y): | |
return X.dot(Y.T) | |
def kernel_ridge_path_eigen(X_train, Y_train, alphas, X_test=None, | |
kernel=_linear_kernel): | |
gramX_train = kernel(X_train, X_train) | |
gramX_test = None | |
if X_test is not None: | |
gramX_test = kernel(X_test, X_train) | |
dual_coef, predictions = precomp_kernel_path_eigen( | |
gramX_train, Y_train, alphas, gramX_test) | |
if X_test is None: | |
return dual_coef | |
else: | |
return predictions | |
def _precomp_kernel_path_looe(v_train, V_train, Y, alphas): | |
VTY = V_train.T.dot(Y) | |
v_alpha = 1. / (v_train[np.newaxis, :, np.newaxis] + | |
alphas[:, np.newaxis]) | |
# should probably be done inplace | |
v_alpha_VTY = v_alpha * VTY[np.newaxis] | |
dual_coef = V_train.dot(v_alpha_VTY).transpose(1, 0, 2) | |
diag_rescale = (V_train ** 2).dot(v_alpha).transpose(1, 0, 2) | |
return dual_coef / diag_rescale | |
def precomp_kernel_ridge_path_looe(gramX, Y, alphas): | |
v, V = np.linalg.eigh(gramX) | |
return _precomp_kernel_path_looe(v, V, Y, alphas) | |
def kernel_ridge_path_looe(X, Y, alphas, kernel=_linear_kernel): | |
gramX = kernel(X, X) | |
return precomp_kernel_ridge_path_looe(gramX, Y, alphas) | |
def kernel_ridge_path_loov(X, Y, alphas, kernel=_linear_kernel): | |
errors = kernel_ridge_path_looe(X, Y, alphas, kernel) | |
return Y[np.newaxis] - errors | |
from numpy.testing import assert_array_almost_equal | |
from sklearn.utils import check_random_state | |
def make_dataset( | |
n_samples=100, | |
n_features=200, | |
n_targets=10, | |
train_frac=.8, | |
random_state=42): | |
rng = check_random_state(random_state) | |
n_train = int(train_frac * n_samples) | |
train = slice(None, n_train) | |
test = slice(n_train, None) | |
X = rng.randn(n_samples, n_features) | |
W = rng.randn(n_features, n_targets) | |
Y_clean = X.dot(W) | |
noise_levels = rng.randn(n_targets) ** 2 | |
noise = rng.randn(*Y_clean.shape) * noise_levels * Y_clean.std(0) | |
Y = Y_clean + noise | |
return X, Y, W, train, test | |
def test_linear_kernel_ridge_path_svd(): | |
n_samples, n_features, n_targets = 100, 200, 10 | |
rng = np.random.RandomState(42) | |
X, Y, W, train, test = make_dataset() | |
from sklearn.linear_model import Ridge | |
alphas = np.logspace(-3, 3, 9)[:, np.newaxis] * rng.randn(n_targets) ** 2 | |
predictions_sklearn = [ | |
Ridge(solver="svd", alpha=alpha, fit_intercept=False).fit( | |
X[train], Y[train]).predict(X[test]) | |
for alpha in alphas] | |
predictions_ridge_path = linear_kernel_ridge_path_svd(X[train], Y[train], | |
alphas=alphas, | |
X_test=X[test])[1] | |
assert_array_almost_equal(predictions_sklearn, | |
predictions_ridge_path) | |
def test_kernel_ridge_path_eigen(): | |
n_samples, n_features, n_targets = 100, 200, 1 | |
rng = np.random.RandomState(42) | |
X, Y, W, train, test = make_dataset() | |
from sklearn.linear_model import Ridge | |
alphas = np.logspace(-3, 3, 9)[:, np.newaxis] * rng.randn(n_targets) ** 2 | |
predictions_sklearn = [ | |
Ridge(solver="svd", alpha=alpha, fit_intercept=False).fit( | |
X[train], Y[train]).predict(X[test]) | |
for alpha in alphas] | |
predictions_ridge_path = kernel_ridge_path_eigen(X[train], Y[train], | |
alphas=alphas, | |
X_test=X[test]) | |
assert_array_almost_equal(predictions_sklearn, | |
predictions_ridge_path) | |
def test_kernel_ridge_path_looe(): | |
n_samples, n_features, n_targets, n_alphas = 100, 200, 10, 9 | |
rng = np.random.RandomState(42) | |
X, Y, W, train, test = make_dataset(n_samples, n_features, n_targets) | |
from sklearn.linear_model import Ridge | |
alphas = np.logspace(-3, 3, n_alphas | |
)[:, np.newaxis] * rng.randn(n_targets) ** 2 | |
from sklearn.linear_model.ridge import _RidgeGCV | |
errors = np.zeros((len(alphas),) + Y.shape) | |
# Prepare some variable to be able to call into private api | |
v, Q = np.linalg.eigh(X.dot(X.T)) | |
QTY = Q.T.dot(Y) | |
r = _RidgeGCV(alphas=[1.], gcv_mode="eigen") | |
# Loop over targets | |
for err, QTy, y, alphas_target in zip( | |
np.rollaxis(errors, 2), QTY.T, Y.T, alphas.T): | |
for alpha, err_alpha in zip(alphas_target, err): | |
err_alpha[:] = r._errors(alpha, y, v, Q, QTy)[0] | |
looe_ridge_path = kernel_ridge_path_looe(X, Y, alphas) | |
assert_array_almost_equal(looe_ridge_path ** 2, errors) | |
if __name__ == "__main__": | |
test_linear_kernel_ridge_path_svd() | |
test_kernel_ridge_path_eigen() | |
test_kernel_ridge_path_looe() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment