Skip to content

Instantly share code, notes, and snippets.

@GaelVaroquaux
Created October 14, 2015 14:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save GaelVaroquaux/2465c9f3421a393b785f to your computer and use it in GitHub Desktop.
Save GaelVaroquaux/2465c9f3421a393b785f to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge, Lasso
from sklearn.cross_validation import ShuffleSplit
from sklearn.grid_search import GridSearchCV
from sklearn.utils import check_random_state
from sklearn import datasets
rnd = check_random_state(1)
# set up dataset
n_samples = 200
n_features = 300
# l1 data (only 5 informative features)
X_1, y_1 = datasets.make_classification(n_samples=n_samples,
n_features=n_features, n_informative=5,
random_state=1)
# l2 data: non sparse, but less features
y_2 = np.sign(.5 - rnd.rand(n_samples))
X_2 = rnd.randn(n_samples, n_features / 5) + y_2[:, np.newaxis]
X_2 += 5 * rnd.randn(n_samples, n_features / 5)
clf_sets = [(Ridge(),
np.logspace(0, 4, 20), X_1, y_1),
(Lasso(),
np.logspace(-2, 1, 20), X_2, y_2)]
colors = ['b', 'g', 'r', 'c']
for fignum, (clf, cs, X, y) in enumerate(clf_sets):
# set up the plot for each regressor
plt.figure(fignum, figsize=(9, 10))
for k, train_size in enumerate(np.linspace(0.15, 0.7, 4)[::-1]):
param_grid = dict(alpha=cs)
# To get nice curve, we need a large number of iterations to
# reduce the variance
grid = GridSearchCV(clf, refit=False, param_grid=param_grid,
cv=ShuffleSplit(n=n_samples, train_size=train_size,
n_iter=250, random_state=1),
n_jobs=-1)
grid.fit(X, y)
scores = [x[1] for x in grid.grid_scores_]
scales = [(1, 'No scaling'),
((n_samples * train_size), 'n_samples'),
]
for subplotnum, (scaler, name) in enumerate(scales):
plt.subplot(2, 1, subplotnum + 1)
plt.xlabel('alpha')
plt.ylabel('CV Score')
grid_cs = cs * float(scaler) # scale the C's
plt.semilogx(grid_cs, scores, label="fraction %.2f" %
train_size)
plt.title('scaling=%s, %s' %
(name, clf.__class__.__name__))
plt.legend(loc="best")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment