Skip to content

Instantly share code, notes, and snippets.

@ogrisel
Last active August 29, 2015 14:21
Show Gist options
  • Save ogrisel/1bb0d62ce5500fce67f1 to your computer and use it in GitHub Desktop.
Save ogrisel/1bb0d62ce5500fce67f1 to your computer and use it in GitHub Desktop.
Simple benchmark to evaluate the impact of compilers on scikit-learn
# Benchmark script for a scikit-learn model that does not use
# BLAS / LAPACK routines
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import fetch_covtype
from sklearn.cross_validation import train_test_split
from time import time
seed = 0
print("Getting covertype data...")
covtype = fetch_covtype()
# Select a small sub-set of the data as training set to make the
# benchmark fast enough to run
X_train, X_test, y_train, y_test = train_test_split(
covtype.data, covtype.target, train_size=int(1e4), random_state=seed)
model = GradientBoostingClassifier(n_estimators=100, random_state=seed)
print("Fitting boosted trees on %d samples..."
% X_train.shape[0])
t0 = time()
model.fit(X_train, y_train)
print("done in %0.3fs" % (time() - t0))
print("Predicting with boosted trees on %d samples..."
% X_test.shape[0])
t0 = time()
accuracy = model.score(X_test, y_test)
print("done in %0.3fs" % (time() - t0))
print("classification accuracy: %0.3f" % accuracy)
# Benchmark script for a scikit-learn model that does use
# BLAS routines intensively via numpy and scipy
from sklearn.linear_model import LogisticRegressionCV
from sklearn.datasets import fetch_covtype
from sklearn.cross_validation import train_test_split
from time import time
seed = 0
print("Getting covertype data...")
covtype = fetch_covtype()
# Select a small sub-set of the data as training set to make the
# benchmark fast enough to run
X_train, X_test, y_train, y_test = train_test_split(
covtype.data, covtype.target, train_size=int(1e4), random_state=seed)
model = LogisticRegressionCV(solver='lbfgs')
print("Fitting warm-started LR models on %d samples..."
% X_train.shape[0])
t0 = time()
model.fit(X_train, y_train)
print("done in %0.3fs" % (time() - t0))
print("Predicting with linear model on %d samples..."
% X_test.shape[0])
t0 = time()
accuracy = model.score(X_test, y_test)
print("done in %0.3fs" % (time() - t0))
print("classification accuracy: %0.3f" % accuracy)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment