Skip to content

Instantly share code, notes, and snippets.

@ogrisel
Created December 13, 2010 01:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ogrisel/738527 to your computer and use it in GitHub Desktop.
Save ogrisel/738527 to your computer and use it in GitHub Desktop.
ElasticNet and whitening
"""Evaluating the impact of PCA + whitening on low rank data"""
import numpy as np
from pprint import pprint
from scikits.learn.datasets.samples_generator import make_regression_dataset
from scikits.learn.pca import PCA
from scikits.learn.linear_model import ElasticNetCV
data_opts = {
'n_train_samples': 5000,
'n_test_samples': 1000,
'n_features': 300,
'n_informative': 100,
'effective_rank': 20,
'tail_strength': 0.01,
'noise': 0.01,
}
print "Generating a low rank dataset with options:"
pprint(data_opts)
print
X_train, y_train, X_test, y_test, coef = make_regression_dataset(**data_opts)
eps = 0.2
print "Evaluating ElasticNetCV without preprocessing"
clf_raw = ElasticNetCV(eps=eps).fit(X_train, y_train)
print "score: %0.3f" % clf_raw.score(X_test, y_test)
print "best alpha: %f" % clf_raw.alpha
print "n predictors: %d" % np.sum(clf_raw.coef_ != 0.0)
print
print "Evaluating ElasticNetCV with PCA without whitening"
pca1 = PCA(whiten=False).fit(X_train)
X_train_pca1 = pca1.transform(X_train)
X_test_pca1 = pca1.transform(X_test)
clf_pca1 = ElasticNetCV(eps=eps).fit(X_train_pca1, y_train)
print "score: %0.3f" % clf_pca1.score(X_test_pca1, y_test)
print "best alpha: %f" % clf_pca1.alpha
print "n predictors: %d" % np.sum(clf_pca1.coef_ != 0.0)
print
print "Evaluating ElasticNetCV with PCA with whitening"
pca2 = PCA(whiten=True).fit(X_train)
X_train_pca2 = pca2.transform(X_train)
X_test_pca2 = pca2.transform(X_test)
clf_pca2 = ElasticNetCV(eps=eps).fit(X_train_pca2, y_train)
print "score: %0.3f" % clf_pca2.score(X_test_pca2, y_test)
print "best alpha: %f" % clf_pca2.alpha
print "n predictors: %d" % np.sum(clf_pca2.coef_ != 0.0)
print
Generating a low rank dataset with options:
{'effective_rank': 20,
'n_features': 300,
'n_informative': 100,
'n_test_samples': 1000,
'n_train_samples': 5000,
'noise': 0.01,
'tail_strength': 0.01}
Evaluating ElasticNetCV without preprocessing
score: 0.056
best alpha: 0.000013
n predictors: 48
Evaluating ElasticNetCV with PCA without whitening
score: 0.120
best alpha: 0.000045
n predictors: 14
Evaluating ElasticNetCV with PCA with whitening
score: 0.221
best alpha: 0.003274
n predictors: 19
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment