Skip to content

Instantly share code, notes, and snippets.

@agramfort
Created April 18, 2012 12:32
Show Gist options
  • Select an option

  • Save agramfort/2413309 to your computer and use it in GitHub Desktop.

Select an option

Save agramfort/2413309 to your computer and use it in GitHub Desktop.
bench lasso sklearn vs SPAMS
"""
Benchmarks of Lasso vs LassoLars vs SPAMS lasso
First, we fix a training set and increase the number of
samples. Then we plot the computation time as function of
the number of samples.
In the second benchmark, we increase the number of dimensions of the
training set. Then we plot the computation time as function of
the number of dimensions.
In both cases, only 10% of the features are informative.
"""
# author : Alexandre Gramfort
# license : BSD
import gc
from time import time
import numpy as np
from sklearn.datasets.samples_generator import make_regression
import spams
def compute_bench(alpha, n_samples, n_features, precompute):
lasso_results = []
spams_lasso_results = []
lars_lasso_results = []
n_test_samples = 0
it = 0
for ns in n_samples:
for nf in n_features:
it += 1
print '=================='
print 'Iteration %s of %s' % (it, max(len(n_samples),
len(n_features)))
print '=================='
n_informative = nf // 10
X, Y, coef_ = make_regression(n_samples=ns, n_features=nf,
n_informative=n_informative,
noise=0.1, coef=True)
X /= np.sqrt(np.sum(X ** 2, axis=0)) # Normalize data
X = np.asfortranarray(X)
gc.collect()
print "- benching Lasso"
clf = Lasso(alpha=alpha, fit_intercept=False,
precompute=precompute)
tstart = time()
clf.fit(X, Y)
lasso_results.append(time() - tstart)
gc.collect()
print "- benching LassoLars"
clf = LassoLars(alpha=alpha, fit_intercept=False,
normalize=False, precompute=precompute)
tstart = time()
clf.fit(X, Y)
lars_lasso_results.append(time() - tstart)
Y = np.asfortranarray(Y[:, None])
gc.collect()
print "- benching spams"
tstart = time()
Q = None
q = None
if precompute:
Q = np.asfortranarray(np.dot(X.T, X))
q = np.asfortranarray(np.dot(X.T, Y))
coef = spams.lasso(Y, X, Q=Q, q=q, lambda1=alpha * len(X))
spams_lasso_results.append(time() - tstart)
return lasso_results, lars_lasso_results, spams_lasso_results
if __name__ == '__main__':
from sklearn.linear_model import Lasso, LassoLars
import pylab as pl
alpha = 0.01 # regularization parameter
n_features = 10
list_n_samples = np.linspace(100, 1000000, 5).astype(np.int)
lasso_results, lars_lasso_results, spams_lasso_results = \
compute_bench(alpha, list_n_samples,
[n_features], precompute=True)
pl.clf()
pl.subplot(211)
pl.plot(list_n_samples, lasso_results, 'b-',
label='Lasso (with precomputed Gram matrix)')
pl.plot(list_n_samples, lars_lasso_results, 'r-',
label='LassoLars (with precomputed Gram matrix)')
pl.plot(list_n_samples, spams_lasso_results, 'k-',
label='SPAMS Lasso (with precomputed Gram matrix)')
pl.title('Lasso benchmark (%d features - alpha=%s)' % (n_features, alpha))
pl.legend(loc='upper left')
pl.xlabel('number of samples')
pl.ylabel('time (in seconds)')
pl.axis('tight')
n_samples = 2000
list_n_features = np.linspace(500, 3000, 5).astype(np.int)
lasso_results, lars_lasso_results, spams_lasso_results = \
compute_bench(alpha, [n_samples],
list_n_features, precompute=False)
pl.subplot(212)
pl.plot(list_n_features, lasso_results, 'b-', label='Lasso')
pl.plot(list_n_features, lars_lasso_results, 'r-', label='LassoLars')
pl.plot(list_n_features, spams_lasso_results, 'k-', label='SPAMS Lasso')
pl.title('Lasso benchmark (%d samples - alpha=%s)' % (n_samples, alpha))
pl.legend(loc='upper left')
pl.xlabel('number of features')
pl.ylabel('time (in seconds)')
pl.axis('tight')
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment