Skip to content

Instantly share code, notes, and snippets.

@ogrisel
Created February 19, 2021 15:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ogrisel/ea98add276dc9624fea8e41a5bb6c989 to your computer and use it in GitHub Desktop.
Save ogrisel/ea98add276dc9624fea8e41a5bb6c989 to your computer and use it in GitHub Desktop.
# %%
import numpy as np
from numpy.testing import assert_allclose
from sklearn.linear_model import Ridge
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from scipy.optimize import fmin_l_bfgs_b, check_grad
np.random.seed(0)
n_samples = 100
X, y = make_regression(n_samples=n_samples, n_features=300)
X_train, X_test, y_train, y_test = train_test_split(X, y)
# Check that setting the sw to 0 for the first 10 samples
# is equivalent to removing them:
sw = 10 * np.ones(X_train.shape[0])
sw[:10] = 0.
alpha = 1e-3
def ridge_bfgs(X, y, sample_weight, alpha, X_test):
def f(w):
coef, intercept = w[:-1], w[-1]
return (
0.5 * np.sum(sample_weight * (y - X @ coef - intercept) ** 2) +
0.5 * alpha * np.sum(coef ** 2)
)
def fprime(w):
coef, intercept = w[:-1], w[-1]
residual = y - X @ coef - intercept
grad = np.empty(X.shape[1] + 1)
grad[:-1] = -X.T @ (sample_weight * residual) + alpha * coef
grad[-1] = -np.sum(sample_weight * residual)
return grad
dim = X.shape[1] + 1
print(check_grad(f, fprime, np.random.RandomState(0).randn(dim)))
w0 = np.zeros(dim)
w0[-1] = np.average(y, weights=sample_weight)
w = fmin_l_bfgs_b(f, w0, fprime, iprint=10)[0]
coef, intercept = w[:-1], w[-1]
y_pred = X_test @ coef + intercept
return y_pred
y_pred = ridge_bfgs(X_train, y_train, sw, alpha, X_test)
y_pred_trimmed = ridge_bfgs(X_train[10:], y_train[10:], sw[10:], alpha, X_test)
assert_allclose(
y_pred,
y_pred_trimmed,
rtol=1e-5
)
y_pred_sklearn = Ridge(
alpha=alpha, normalize=False, # solver="svd",
).fit(
X_train, y_train, sample_weight=sw
).predict(X_test)
assert_allclose(
y_pred,
y_pred_sklearn,
rtol=1e-5
)
y_pred_2x = ridge_bfgs(X_train, y_train, sw * 2, alpha, X_test)
y_pred_duplicated = ridge_bfgs(
np.concatenate([X_train, X_train]),
np.concatenate([y_train, y_train]),
np.concatenate([sw, sw]),
alpha, X_test)
assert_allclose(
y_pred_2x,
y_pred_duplicated,
rtol=1e-5
)
y_pred_sklearn_2x = Ridge(
alpha=alpha, normalize=False, # solver="svd",
).fit(
X_train, y_train, sample_weight=2 * sw
).predict(X_test)
assert_allclose(
y_pred_2x,
y_pred_sklearn_2x,
rtol=1e-5
)
@ogrisel
Copy link
Author

ogrisel commented Feb 19, 2021

Here is a sample output (on scikit-learn main):

2.3773271663547337
1.4133899312192566
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
~/ridge.py in 
     61 ).predict(X_test)
     62 
---> 63 assert_allclose(
     64     y_pred,
     65     y_pred_sklearn,

    [... skipping hidden 1 frame]

~/miniforge3/envs/dev/lib/python3.9/site-packages/numpy/testing/_private/utils.py in assert_array_compare(comparison, x, y, err_msg, verbose, header, precision, equal_nan, equal_inf)
    840                                 verbose=verbose, header=header,
    841                                 names=('x', 'y'), precision=precision)
--> 842             raise AssertionError(msg)
    843     except ValueError:
    844         import traceback

AssertionError: 
Not equal to tolerance rtol=1e-05, atol=0

Mismatched elements: 25 / 25 (100%)
Max absolute difference: 6.03654103
Max relative difference: 2.87084231
 x: array([  -2.87583 ,  -25.284264,   28.037629,  -26.2939  ,  -17.915856,
         -3.740465,  -23.20694 ,  107.58785 , -102.516502, -187.785853,
        105.010522, -159.983075,   -9.83612 ,   78.818349, -118.147498,...
 y: array([   1.537185,  -23.192361,   31.94037 ,  -22.304428,  -11.879315,
         -2.363851,  -20.120742,  111.605058,  -98.727673, -186.442948,
        105.06974 , -155.375173,   -4.925645,   81.9221  , -112.592249,...

The check_grad value seems high but I cannot see the problem in the gradient expression.

@ogrisel
Copy link
Author

ogrisel commented Feb 19, 2021

on the 19426 branch, I get the same results (the normalize=False case is apparently not impacted by the PR):

2.3773271663547337
1.4133899312192566
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
~/ridge.py in 
     61 ).predict(X_test)
     62 
---> 63 assert_allclose(
     64     y_pred,
     65     y_pred_sklearn,

    [... skipping hidden 1 frame]

~/miniforge3/envs/dev/lib/python3.9/site-packages/numpy/testing/_private/utils.py in assert_array_compare(comparison, x, y, err_msg, verbose, header, precision, equal_nan, equal_inf)
    840                                 verbose=verbose, header=header,
    841                                 names=('x', 'y'), precision=precision)
--> 842             raise AssertionError(msg)
    843     except ValueError:
    844         import traceback

AssertionError: 
Not equal to tolerance rtol=1e-05, atol=0

Mismatched elements: 25 / 25 (100%)
Max absolute difference: 6.03654103
Max relative difference: 2.87084231
 x: array([  -2.87583 ,  -25.284264,   28.037629,  -26.2939  ,  -17.915856,
         -3.740465,  -23.20694 ,  107.58785 , -102.516502, -187.785853,
        105.010522, -159.983075,   -9.83612 ,   78.818349, -118.147498,...
 y: array([   1.537185,  -23.192361,   31.94037 ,  -22.304428,  -11.879315,
         -2.363851,  -20.120742,  111.605058,  -98.727673, -186.442948,
        105.06974 , -155.375173,   -4.925645,   81.9221  , -112.592249,...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment