Skip to content

Instantly share code, notes, and snippets.

@ogrisel
Last active March 4, 2021 15:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ogrisel/cf7ac8bca6725ec02d4cc8d03ce096a7 to your computer and use it in GitHub Desktop.
Save ogrisel/cf7ac8bca6725ec02d4cc8d03ce096a7 to your computer and use it in GitHub Desktop.
# %%
import numpy as np
from numpy.random import sample
from numpy.testing import assert_allclose
from sklearn.preprocessing import scale
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from scipy.optimize import fmin_l_bfgs_b, check_grad
rng = np.random.RandomState(42)
n_samples = 200
X, y = make_regression(
n_samples=n_samples,
effective_rank=10,
n_features=300,
n_informative=50,
random_state=0,
)
X = scale(X) # to be able to use normalize=False safely
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
# Check that setting the sw to 0 for the first samples is equivalent to
# removing them:
# sw = np.full(shape=X_train.shape[0], fill_value=.42)
sw = rng.uniform(low=0.1, high=2, size=X_train.shape[0])
cutoff = X.shape[0] // 3
sw[:cutoff] = 0.
X_train_trimmed = X_train[cutoff:]
y_train_trimmed = y_train[cutoff:]
sw_trimmed = sw[cutoff:]
alpha = 1.
def ridge_loss(coef, intercept, alpha, X, y, sample_weight=None):
if sample_weight is None:
sample_weight = np.ones(shape=X.shape[0], dtype=X.dtype)
return (
0.5 * np.sum(sample_weight * (y - X @ coef - intercept) ** 2) +
0.5 * alpha * np.sum(coef ** 2)
)
def ridge_bfgs(X, y, alpha, sample_weight=None):
if sample_weight is None:
sample_weight = np.ones(shape=X.shape[0], dtype=X.dtype)
def f(w):
coef, intercept = w[:-1], w[-1]
return ridge_loss(coef, intercept, alpha, X, y,
sample_weight=sample_weight)
def fprime(w):
coef, intercept = w[:-1], w[-1]
residual = y - X @ coef - intercept
grad = np.empty(X.shape[1] + 1)
grad[:-1] = -X.T @ (sample_weight * residual) + alpha * coef
grad[-1] = -np.sum(sample_weight * residual)
return grad
dim = X.shape[1] + 1
# print(
# "check_grad:",
# check_grad(f, fprime, np.random.RandomState(0).randn(dim)),
# )
w0 = np.zeros(dim)
w0[-1] = np.average(y, weights=sample_weight)
# Set factr to a lower value than by default to get reach a higher
# quality convergence.
w = fmin_l_bfgs_b(f, w0, fprime, factr=10.)[0]
coef, intercept = w[:-1], w[-1]
return coef, intercept
# %%
# Sanity check w.r.t. sample weight invariance properties of the ridge_bfgs
# implementation.
coef, intercept = ridge_bfgs(X_train, y_train, alpha, sw)
coef_trimmed, intercept_trimmed = ridge_bfgs(
X_train_trimmed, y_train_trimmed, alpha, sw_trimmed)
assert_allclose(coef, coef_trimmed)
assert_allclose(intercept, intercept_trimmed)
coef_2sw, intercept_2sw = ridge_bfgs(X_train, y_train, alpha, 2 * sw)
coef_dup, intercept_dup = ridge_bfgs(
np.concatenate([X_train, X_train]),
np.concatenate([y_train, y_train]),
alpha,
np.concatenate([sw, sw]),
)
assert_allclose(coef_2sw, coef_dup)
assert_allclose(intercept_2sw, intercept_dup)
print("ridge_bfgs sample_weight invariance: ok")
# %%
# Check that we have converge to a minimum of the ridge_loss function
def check_min(coef, intercept, alpha, X, y, sample_weight=None,
noise_scale=1e-7, seed=0, n_checks=100):
# Check that applying a small random perturbation on the fitted
# causes the loss to increase.
if sample_weight is None:
sample_weight = np.ones(shape=X.shape[0], dtype=X.dtype)
rng = np.random.RandomState(seed)
model_loss = ridge_loss(
coef,
intercept,
alpha,
X,
y,
sample_weight=sample_weight,
)
noise_scale_range = np.logspace(
np.log10(noise_scale), np.log10(noise_scale) + 4, 4
)[::-1]
for ns in noise_scale_range:
correct_checks = []
for _ in range(n_checks):
perturbation = ns * rng.normal(size=len(coef))
perturbed_loss = ridge_loss(
coef + perturbation,
intercept + ns * rng.normal(size=1),
alpha,
X,
y,
sample_weight=sample_weight,
)
correct_checks.append(
perturbed_loss > model_loss - np.finfo(X.dtype).eps
)
if np.mean(correct_checks) < 0.99:
raise AssertionError(
f"Only {np.sum(correct_checks)}/{n_checks} passed at "
f"noise scale {ns:.3e}."
)
check_min(coef, intercept, alpha, X_train, y_train, sw, noise_scale=1e-7)
check_min(coef_2sw, intercept_2sw, alpha, X_train, y_train, 2 * sw,
noise_scale=1e-7)
# %%
# Check consistency with Ridge(normalize=False)
from sklearn.linear_model import Ridge
solver = "cholesky"
solver_tol = 1e-12 # only used for some solvers
ridge_sklearn = Ridge(
alpha=alpha,
normalize=False,
solver=solver,
tol=solver_tol,
).fit(X_train, y_train, sample_weight=sw)
check_min(ridge_sklearn.coef_, ridge_sklearn.intercept_, alpha,
X_train, y_train, sw, noise_scale=1e-9)
atol = 1e-4 * np.median(np.abs(coef))
assert_allclose(ridge_sklearn.coef_, coef, atol=atol)
assert_allclose(ridge_sklearn.intercept_, intercept, atol=atol)
# %%
# Check consistency with Ridge(normalize=False)
ridge_sklearn_2sw = Ridge(
alpha=alpha,
normalize=False,
solver=solver,
tol=solver_tol,
).fit(X_train, y_train, sample_weight=2 * sw)
check_min(ridge_sklearn_2sw.coef_, ridge_sklearn_2sw.intercept_, alpha,
X_train, y_train, 2 * sw, noise_scale=1e-9)
atol = 1e-4 * np.median(np.abs(coef_2sw))
assert_allclose(ridge_sklearn_2sw.coef_, coef_2sw, atol=atol)
assert_allclose(ridge_sklearn_2sw.intercept_, intercept_2sw, atol=atol)
# %%
# Large-ish scale benchmarks
rng = np.random.RandomState(42)
X_large, y_large = make_regression(
n_samples=int(5e3),
effective_rank=100,
n_features=int(2e3),
n_informative=50,
random_state=0,
)
X = scale(X)
#%%
alpha = 1e-6
# %%
%%time
coef_large, intercept_large = ridge_bfgs(X_large, y_large, alpha)
# %%
check_min(coef_large, intercept_large,
alpha, X_large, y_large, noise_scale=1e-6)
# %%
%%time
ridge_sklearn_large = Ridge(
alpha=alpha,
normalize=False,
solver="cholesky",
tol=solver_tol,
).fit(X_large, y_large)
# %%
check_min(ridge_sklearn_large.coef_, ridge_sklearn_large.intercept_,
alpha, X_large, y_large, noise_scale=1e-12)
# %%
atol = 1e-2 * np.median(np.abs(coef))
assert_allclose(coef_large, ridge_sklearn_large.coef_, atol=atol)
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment