Skip to content

Instantly share code, notes, and snippets.

@agramfort
Forked from jaquesgrobler/scale_c_ridge.py
Created May 2, 2012 17:26
Show Gist options
  • Save agramfort/2578458 to your computer and use it in GitHub Desktop.
Save agramfort/2578458 to your computer and use it in GitHub Desktop.
Ridge regression scaled
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
=========================================================
Title
=========================================================
Description
"""
print __doc__
# Author: Andreas Mueller <amueller@ais.uni-bonn.de>
# Jaques Grobler <jaques.grobler@inria.fr>
# License: BSD
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.cross_validation import ShuffleSplit
from sklearn.grid_search import GridSearchCV
from sklearn.utils import check_random_state
from time import time
if __name__ == '__main__':
rnd = check_random_state(1)
# set up dataset
n_samples = 100
n_features = 1000
subplotnum = 1
X = rnd.randn(n_samples, n_features)
coef = 5 * rnd.randn(n_features)
# coef[10:] = 0
y = np.dot(X, coef)
clf = Ridge(fit_intercept=False, tol=0.1)
alphas = np.logspace(-2, 6, 30)
t0_clf = time()
plt.figure(figsize=(9, 10))
plt.clf()
plt.xlabel('C')
plt.ylabel('CV Score')
colors = ['b', 'g', 'r', 'c']
for k, train_fraction in enumerate(np.arange(0.2, 0.6, 0.1)[::-1]):
param_grid = dict(alpha=alphas)
t0 = time()
grid = GridSearchCV(clf, n_jobs=-1, refit=False, param_grid=param_grid,
cv=ShuffleSplit(n=n_samples, train_fraction=train_fraction,
n_iterations=100, random_state=1))
grid.fit(X, y)
print "GridSearchCV done in %0.3fs." % (time() - t0)
scores = [x[1] for x in grid.grid_scores_]
print(clf)
scales = [(1, 'No scaling'),
(1./(np.sqrt(n_samples * train_fraction)), '1/sqrt(n_samples)'),
(1./(n_samples * train_fraction), '1/n_samples'),
]
for subplotnum, (scaler, name) in enumerate(scales):
print ('%s: Scaler: %s (1/%s)') % (subplotnum + 1, name, scaler)
plt.subplot(3, 1, subplotnum + 1)
grid_alphas = alphas / float(scaler)
plt.semilogx(grid_alphas, scores,
label="fraction %.2f" % train_fraction)
ymin, ymax = plt.ylim()
plt.vlines(grid_alphas[np.argmax(scores)], ymin, ymax,
color=colors[k], linestyles='solid')
print '-------------------------------'
# plot ledgend
plt.legend(loc="lower left")
print "Total time taken: %0.3fs." % (time() - t0_clf)
plt.show()
print "Completed"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment