ogrisel/bench_boosted_trees.py

## bench_boosted_trees.py
# Benchmark script for a scikit-learn model that does not use
# BLAS / LAPACK routines
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import fetch_covtype
from sklearn.cross_validation import train_test_split
from time import time

seed = 0

print("Getting covertype data...")
covtype = fetch_covtype()

# Select a small sub-set of the data as training set to make the
# benchmark fast enough to run
X_train, X_test, y_train, y_test = train_test_split(
    covtype.data, covtype.target, train_size=int(1e4), random_state=seed)

model = GradientBoostingClassifier(n_estimators=100, random_state=seed)

print("Fitting boosted trees on %d samples..."
      % X_train.shape[0])
t0 = time()
model.fit(X_train, y_train)
print("done in %0.3fs" % (time() - t0))

print("Predicting with boosted trees on %d samples..."
      % X_test.shape[0])
t0 = time()
accuracy = model.score(X_test, y_test)
print("done in %0.3fs" % (time() - t0))
print("classification accuracy: %0.3f" % accuracy)

## bench_logistic_regression.py
# Benchmark script for a scikit-learn model that does use
# BLAS routines intensively via numpy and scipy
from sklearn.linear_model import LogisticRegressionCV
from sklearn.datasets import fetch_covtype
from sklearn.cross_validation import train_test_split
from time import time

seed = 0

print("Getting covertype data...")
covtype = fetch_covtype()

# Select a small sub-set of the data as training set to make the
# benchmark fast enough to run
X_train, X_test, y_train, y_test = train_test_split(
    covtype.data, covtype.target, train_size=int(1e4), random_state=seed)

model = LogisticRegressionCV(solver='lbfgs')

print("Fitting warm-started LR models on %d samples..."
      % X_train.shape[0])
t0 = time()
model.fit(X_train, y_train)
print("done in %0.3fs" % (time() - t0))

print("Predicting with linear model on %d samples..."
      % X_test.shape[0])
t0 = time()
accuracy = model.score(X_test, y_test)
print("done in %0.3fs" % (time() - t0))
print("classification accuracy: %0.3f" % accuracy)
	# Benchmark script for a scikit-learn model that does not use
	# BLAS / LAPACK routines
	from sklearn.ensemble import GradientBoostingClassifier
	from sklearn.datasets import fetch_covtype
	from sklearn.cross_validation import train_test_split
	from time import time

	seed = 0

	print("Getting covertype data...")
	covtype = fetch_covtype()

	# Select a small sub-set of the data as training set to make the
	# benchmark fast enough to run
	X_train, X_test, y_train, y_test = train_test_split(
	covtype.data, covtype.target, train_size=int(1e4), random_state=seed)

	model = GradientBoostingClassifier(n_estimators=100, random_state=seed)

	print("Fitting boosted trees on %d samples..."
	% X_train.shape[0])
	t0 = time()
	model.fit(X_train, y_train)
	print("done in %0.3fs" % (time() - t0))

	print("Predicting with boosted trees on %d samples..."
	% X_test.shape[0])
	t0 = time()
	accuracy = model.score(X_test, y_test)
	print("done in %0.3fs" % (time() - t0))
	print("classification accuracy: %0.3f" % accuracy)
	# Benchmark script for a scikit-learn model that does use
	# BLAS routines intensively via numpy and scipy
	from sklearn.linear_model import LogisticRegressionCV
	from sklearn.datasets import fetch_covtype
	from sklearn.cross_validation import train_test_split
	from time import time

	seed = 0

	print("Getting covertype data...")
	covtype = fetch_covtype()

	# Select a small sub-set of the data as training set to make the
	# benchmark fast enough to run
	X_train, X_test, y_train, y_test = train_test_split(
	covtype.data, covtype.target, train_size=int(1e4), random_state=seed)

	model = LogisticRegressionCV(solver='lbfgs')

	print("Fitting warm-started LR models on %d samples..."
	% X_train.shape[0])
	t0 = time()
	model.fit(X_train, y_train)
	print("done in %0.3fs" % (time() - t0))

	print("Predicting with linear model on %d samples..."
	% X_test.shape[0])
	t0 = time()
	accuracy = model.score(X_test, y_test)
	print("done in %0.3fs" % (time() - t0))
	print("classification accuracy: %0.3f" % accuracy)