Last active December 27, 2017 03:30
Benchmarks for learning rate updating schemes in MLP
import numpy as np
from time import time
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
h = .02 # step size in the mesh
'Adam': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1),
'Adam_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1, early_stopping=True),
'l-bfgs': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='l-bfgs', learning_rate_init=0.01, verbose=1,
tol=1e-4, random_state=1, early_stopping=False),
names = ESTIMATORS.keys()
classifiers = ESTIMATORS.values()
def make_datasets(n_samples=100):
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1,
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)
datasets = [make_moons(noise=0.3, random_state=0, n_samples=n_samples),
make_circles(noise=0.2, factor=0.5, random_state=1,
return datasets
figure = plt.figure(figsize=(27, 9))
i = 0
# iterate over datasets
sample_sizes = range(100, 1000, 400)
datasets = []
for n_samples in sample_sizes:
datasets += make_datasets(n_samples)
for j, ds in enumerate(datasets):
# preprocess dataset, split into training and test part
X, y = ds
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
# just plot the dataset first
cm =
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = plt.subplot(len(classifiers) + 1, len(datasets),
i % (len(classifiers) + 1) * len(datasets) + j + 1)
# Plot the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_title(str(len(y)), fontsize=10)
i += 1
# iterate over classifiers
cnt = 0
for name, clf in zip(names, classifiers):
cnt += 1
ax = plt.subplot(len(classifiers) + 1, len(datasets),
i % (len(classifiers) + 1) * len(datasets) + j + 1)
time_start = time(), y_train)
train_time = time() - time_start
score = clf.score(X_test, y_test)
# Plot the decision boundary. For that, we will assign a color to
# each point in the mesh [x_min, m_max]x[y_min, y_max].
if hasattr(clf, "decision_function"):
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
# Put the result into a color plot
Z = Z.reshape(xx.shape)
ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)
# Plot also the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_title(name, fontsize=10)
ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
size=15, horizontalalignment='right')
ax.text(xx.min() + .3, yy.min() + .3,
('%.3f' % train_time).lstrip('0'),
size=15, horizontalalignment='left')
i += 1
figure.subplots_adjust(left=.02, right=.98)
Benchmarking adam and lbfgs on Boston dataset
Regression performance:
Regressor train-time test-time test-score
adam 0.3896s 0.0003s 0.8606
l-bfgs 0.5861s 0.0003s 0.8689
adam-early 0.6177s 0.0004s 0.8750
from __future__ import print_function
import numpy as np
from time import time
import argparse
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.neural_network import MLPRegressor
# import some data to play with
def load_data():
dataset = datasets.load_boston()
X = # we only take the first two features.
X = StandardScaler().fit_transform(X)
y =
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2,
scaler = StandardScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)
return train_X, test_X, train_y, test_y
ESTIMATORS = {'adam': MLPRegressor(random_state=1,
hidden_layer_sizes=(100, 100)),
'adam-early': MLPRegressor(random_state=1, early_stopping=True,
hidden_layer_sizes=(100, 100)),
'l-bfgs': MLPRegressor(algorithm='l-bfgs', random_state=1,
hidden_layer_sizes=(100, 100))}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--estimators', nargs="+",
choices=ESTIMATORS.keys() + ['all'], type=str,
default=['adam', 'adam-early', 'l-bfgs'],
help="list of classifiers to benchmark.")
parser.add_argument('--n-jobs', nargs="?", default=1, type=int,
help="Number of concurrently running workers for "
"models that support parallelism.")
parser.add_argument('--random-seed', nargs="?", default=0, type=int,
help="Common seed used by random number generator.")
args = vars(parser.parse_args())
X_train, X_test, y_train, y_test = load_data()
print("Dataset statistics:")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print("%s %d (size=%dMB)" % ("number of train samples:".ljust(25),
X_train.shape[0], int(X_train.nbytes / 1e6)))
print("%s %d (size=%dMB)" % ("number of test samples:".ljust(25),
X_test.shape[0], int(X_test.nbytes / 1e6)))
print("Training Estimators")
error, train_time, test_time = {}, {}, {}
if 'all' in args['estimators']:
args['estimators'] = ESTIMATORS.keys()
for name in sorted(args["estimators"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(**{p: args["random_seed"]
for p in estimator_params
if p.endswith("random_state")})
if "n_jobs" in estimator_params:
time_start = time(), y_train)
train_time[name] = time() - time_start
time_start = time()
y_pred = estimator.predict(X_test)
test_time[name] = time() - time_start
error[name] = estimator.score(X_test, y_test)
print("Regression performance:")
print("{0: <23} {1: >10} {2: >11} {3: >12}"
"".format("Regressor ", "train-time", "test-time",
print("-" * 76)
for name in sorted(args["estimators"], key=error.get):
print("{0: <24} {1: >10.4f}s {2: >10.4f}s {3: >12.4f}"
"".format(name, train_time[name], test_time[name], error[name]))
Benchmarking adam and lbfgs on Diabetes dataset
Regression performance:
Regressor train-time test-time test-score
adam-early 0.3612s 0.0002s 0.2961
adam 0.4856s 0.0003s 0.3538
l-bfgs 0.4855s 0.0003s 0.4170
from __future__ import print_function
import numpy as np
from time import time
import argparse
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.neural_network import MLPRegressor
# import some data to play with
def load_data():
iris = datasets.load_diabetes()
X = # we only take the first two features.
y =
return train_test_split(X, y, test_size=0.2, random_state=1)
ESTIMATORS = {'adam': MLPRegressor(random_state=1,
hidden_layer_sizes=(100, 100)),
'adam-early': MLPRegressor(random_state=1, early_stopping=True,
hidden_layer_sizes=(100, 100)),
'l-bfgs': MLPRegressor(algorithm='l-bfgs', random_state=1,
hidden_layer_sizes=(100, 100))}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--estimators', nargs="+",
choices=ESTIMATORS.keys() + ['all'], type=str,
default=['adam', 'adam-early', 'l-bfgs'],
help="list of classifiers to benchmark.")
parser.add_argument('--n-jobs', nargs="?", default=1, type=int,
help="Number of concurrently running workers for "
"models that support parallelism.")
parser.add_argument('--random-seed', nargs="?", default=0, type=int,
help="Common seed used by random number generator.")
args = vars(parser.parse_args())
X_train, X_test, y_train, y_test = load_data()
print("Dataset statistics:")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print("%s %d (size=%dMB)" % ("number of train samples:".ljust(25),
X_train.shape[0], int(X_train.nbytes / 1e6)))
print("%s %d (size=%dMB)" % ("number of test samples:".ljust(25),
X_test.shape[0], int(X_test.nbytes / 1e6)))
print("Training Estimators")
error, train_time, test_time = {}, {}, {}
if 'all' in args['estimators']:
args['estimators'] = ESTIMATORS.keys()
for name in sorted(args["estimators"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(**{p: args["random_seed"]
for p in estimator_params
if p.endswith("random_state")})
if "n_jobs" in estimator_params:
time_start = time(), y_train)
train_time[name] = time() - time_start
time_start = time()
y_pred = estimator.predict(X_test)
test_time[name] = time() - time_start
error[name] = estimator.score(X_test, y_test)
print("Regression performance:")
print("{0: <23} {1: >10} {2: >11} {3: >12}"
"".format("Regressor ", "train-time", "test-time",
print("-" * 76)
for name in sorted(args["estimators"], key=error.get):
print("{0: <24} {1: >10.4f}s {2: >10.4f}s {3: >12.4f}"
"".format(name, train_time[name], test_time[name], error[name]))
Classification performance:
Classifier train-time test-time error-rate
adam 1.1049s 0.0010s 0.0167
l-bfgs 0.0910s 0.0008s 0.0306
adam-early 0.1354s 0.0009s 0.0528
from __future__ import print_function
import numpy as np
from time import time
import argparse
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import zero_one_loss
from sklearn.cross_validation import train_test_split
from sklearn.neural_network import MLPClassifier
def load_data():
dataset = datasets.load_digits()
X = # we only take the first two features.
X = StandardScaler().fit_transform(X)
y =
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2,
scaler = StandardScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)
return train_X, test_X, train_y, test_y
ESTIMATORS = {'adam': MLPClassifier(random_state=1),
'adam-early': MLPClassifier(random_state=1, early_stopping=True),
'l-bfgs': MLPClassifier(algorithm='l-bfgs', random_state=1)}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--classifiers', nargs="+",
choices=ESTIMATORS.keys() + ['all'], type=str,
default=['adam', 'adam-early', 'l-bfgs'],
help="list of classifiers to benchmark.")
parser.add_argument('--n-jobs', nargs="?", default=1, type=int,
help="Number of concurrently running workers for "
"models that support parallelism.")
parser.add_argument('--random-seed', nargs="?", default=0, type=int,
help="Common seed used by random number generator.")
args = vars(parser.parse_args())
X_train, X_test, y_train, y_test = load_data()
print("Dataset statistics:")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print("%s %d (size=%dMB)" % ("number of train samples:".ljust(25),
X_train.shape[0], int(X_train.nbytes / 1e6)))
print("%s %d (size=%dMB)" % ("number of test samples:".ljust(25),
X_test.shape[0], int(X_test.nbytes / 1e6)))
print("Training Classifiers")
error, train_time, test_time, loss_curve, val_curve = {}, {}, {}, {}, {}
if 'all' in args['classifiers']:
args['classifiers'] = ESTIMATORS.keys()
for name in sorted(args["classifiers"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(**{p: args["random_seed"]
for p in estimator_params
if p.endswith("random_state")})
if "n_jobs" in estimator_params:
time_start = time(), y_train)
train_time[name] = time() - time_start
time_start = time()
y_pred = estimator.predict(X_test)
test_time[name] = time() - time_start
error[name] = zero_one_loss(y_test, y_pred)
print("Classification performance:")
print("{0: <23} {1: >10} {2: >11} {3: >12}"
"".format("Classifier ", "train-time", "test-time", "error-rate"))
print("-" * 76)
for name in sorted(args["classifiers"], key=error.get):
print("{0: <24} {1: >10.4f}s {2: >10.4f}s {3: >12.4f}"
"".format(name, train_time[name], test_time[name], error[name]))
Classification performance:
Classifier train-time test-time error-rate
adam 0.1003s 0.0002s 0.0333
l-bfgs 0.0344s 0.0001s 0.0333
adam-early 0.0083s 0.0001s 0.5333
from __future__ import print_function
import numpy as np
from time import time
import argparse
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import zero_one_loss
from sklearn.cross_validation import train_test_split
from sklearn.neural_network import MLPClassifier
def load_data():
dataset = datasets.load_iris()
X = # we only take the first two features.
X = StandardScaler().fit_transform(X)
y =
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2,
scaler = StandardScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)
return train_X, test_X, train_y, test_y
ESTIMATORS = {'adam': MLPClassifier(random_state=1),
'adam-early': MLPClassifier(random_state=1, early_stopping=True),
'l-bfgs': MLPClassifier(algorithm='l-bfgs', random_state=1)}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--classifiers', nargs="+",
choices=ESTIMATORS.keys() + ['all'], type=str,
default=['adam', 'adam-early', 'l-bfgs'],
help="list of classifiers to benchmark.")
parser.add_argument('--n-jobs', nargs="?", default=1, type=int,
help="Number of concurrently running workers for "
"models that support parallelism.")
parser.add_argument('--random-seed', nargs="?", default=0, type=int,
help="Common seed used by random number generator.")
args = vars(parser.parse_args())
X_train, X_test, y_train, y_test = load_data()
print("Dataset statistics:")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print("%s %d (size=%dMB)" % ("number of train samples:".ljust(25),
X_train.shape[0], int(X_train.nbytes / 1e6)))
print("%s %d (size=%dMB)" % ("number of test samples:".ljust(25),
X_test.shape[0], int(X_test.nbytes / 1e6)))
print("Training Classifiers")
error, train_time, test_time, loss_curve, val_curve = {}, {}, {}, {}, {}
if 'all' in args['classifiers']:
args['classifiers'] = ESTIMATORS.keys()
for name in sorted(args["classifiers"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(**{p: args["random_seed"]
for p in estimator_params
if p.endswith("random_state")})
if "n_jobs" in estimator_params:
time_start = time(), y_train)
train_time[name] = time() - time_start
time_start = time()
y_pred = estimator.predict(X_test)
test_time[name] = time() - time_start
error[name] = zero_one_loss(y_test, y_pred)
print("Classification performance:")
print("{0: <23} {1: >10} {2: >11} {3: >12}"
"".format("Classifier ", "train-time", "test-time", "error-rate"))
print("-" * 76)
for name in sorted(args["classifiers"], key=error.get):
print("{0: <24} {1: >10.4f}s {2: >10.4f}s {3: >12.4f}"
"".format(name, train_time[name], test_time[name], error[name]))
Benchmarking MLP Performances on 20NewGroup dataset
Classification performance:
Classifier train-time test-time Accuracy
MLP_SGD_constant_no_momentum_early 61.9779s 0.1686s 0.0333
MLP_SGD_invscaling_nesterov 143.6211s 0.1713s 0.0506
MLP_SGD_invscaling_nesterov_early 212.4194s 0.1691s 0.0769
MLP_SGD_constant_no_momentum 6882.5231s 0.1800s 0.5842
MLP_SGD_constant_nesterov_early 1871.2481s 0.1793s 0.7302
MLP_SGD_adaptive_nesterov_early 2395.0412s 0.1822s 0.7382
MLP_SGD_constant_nesterov 5725.5833s 0.1799s 0.7649
MLP_SGD_adaptive_nesterov 6263.0604s 0.1733s 0.7678
MLP_SGD_constant_momentum 4174.8977s 0.1666s 0.7678
MLP_Adam 1395.2267s 0.1822s 0.8314
MLP_Adam_early 528.2558s 0.1718s 0.8330
with learning_rate_init=0.1 for sgd:
Classification performance:
Classifier train-time test-time Accuracy
MLP_SGD_invscaling_nesterov_early 96.1587s 0.1757s 0.1032
MLP_SGD_invscaling_nesterov 11964.9086s 0.2026s 0.1374
MLP_SGD_constant_no_momentum_early 326.6792s 0.2224s 0.1620
MLP_SGD_constant_nesterov_early 407.4799s 0.2217s 0.6190
MLP_SGD_adaptive_nesterov_early 2152.5739s 0.2308s 0.7338
MLP_SGD_constant_momentum 1169.3004s 0.2609s 0.7617
MLP_SGD_constant_nesterov 1634.3541s 0.2659s 0.7681
MLP_SGD_constant_no_momentum 5292.0535s 0.2432s 0.7747
MLP_SGD_adaptive_nesterov 2166.2128s 0.2393s 0.7781
MLP_Adam 1740.3218s 0.2305s 0.8314
MLP_Adam_early 636.6075s 0.1822s 0.8330
from __future__ import print_function, division
from time import time
import cPickle as pickle
import argparse
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_20newsgroups_vectorized
from sklearn.metrics import accuracy_score
from sklearn.utils.validation import check_array
from sklearn.neural_network import MLPClassifier
def make_plots(loss, val_loss):
non_early = [name for name in loss if not name.endswith('_early')]
early = [name for name in loss if name.endswith('_early')]
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
make_sub_plot({name: loss[name] for name in non_early}, axes.ravel()[0])
for name, ax in zip(early, axes.ravel()[1:]):
make_sub_plot({name[:-6]: loss[name[:-6]], name: loss[name],
name + '_val': val_loss[name]}, ax)
def make_sub_plot(loss, ax):
plot_args = [{'c': 'red', 'linestyle': '-'},
{'c': 'green', 'linestyle': '-'},
{'c': 'blue', 'linestyle': '-'},
{'c': 'red', 'linestyle': '--'},
{'c': 'green', 'linestyle': '--'},
{'c': 'blue', 'linestyle': '--'}]
for label, loss_curve, args in zip(loss.keys(), loss.values(), plot_args):
ax.plot(loss_curve, label=label, **args)
if len(loss) > 3:
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(0.95, 1.30), fontsize=11)
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(1.05, 1.20), fontsize=11)
'MLP_SGD_constant_no_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_no_momentum_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False,
'MLP_SGD_constant_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1),
'MLP_SGD_constant_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'MLP_SGD_invscaling_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'MLP_SGD_invscaling_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling', early_stopping=True),
'MLP_SGD_adaptive_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'MLP_SGD_adaptive_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive', early_stopping=True),
'MLP_Adam': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1),
'MLP_Adam_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1, early_stopping=True),
# Data
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-e', '--estimators', nargs="+", required=True,
choices=ESTIMATORS.keys() + ['all'])
args = vars(parser.parse_args())
data_train = fetch_20newsgroups_vectorized(subset="train")
data_test = fetch_20newsgroups_vectorized(subset="test")
X_train = check_array(, dtype=np.float32,
X_test = check_array(, dtype=np.float32, accept_sparse="csr")
y_train =
y_test =
print("20 newsgroups")
print("X_train.shape = {0}".format(X_train.shape))
print("X_train.format = {0}".format(X_train.format))
print("X_train.dtype = {0}".format(X_train.dtype))
print("X_train density = {0}"
"".format(X_train.nnz / np.product(X_train.shape)))
print("y_train {0}".format(y_train.shape))
print("X_test {0}".format(X_test.shape))
print("X_test.format = {0}".format(X_test.format))
print("X_test.dtype = {0}".format(X_test.dtype))
print("y_test {0}".format(y_test.shape))
print("Classifier Training")
accuracy, train_time, test_time, loss_curve, val_curve = {}, {}, {}, {}, {}
if 'all' in args['estimators']:
args['estimators'] = ESTIMATORS.keys()
for name in sorted(args["estimators"]):
clf = ESTIMATORS[name]
except (TypeError, ValueError):
print("Training %s ... " % name, end="")
t0 = time(), y_train)
train_time[name] = time() - t0
t0 = time()
y_pred = clf.predict(X_test)
test_time[name] = time() - t0
accuracy[name] = accuracy_score(y_test, y_pred)
loss_curve[name] = clf.loss_curve_
val_curve[name] = getattr(clf, 'validation_scores_', [])
print("Classification performance:")
print("%s %s %s %s" % ("Classifier ", "train-time", "test-time",
print("-" * 67)
for name in sorted(accuracy, key=accuracy.get):
print("%s %s %s %s" % (name.ljust(36),
("%.4fs" % train_time[name]).center(10),
("%.4fs" % test_time[name]).center(10),
("%.4f" % accuracy[name]).center(10)))
with open('loss_history_20news.pkl', 'wb') as f:
pickle.dump(loss_curve, f)
with open('val_loss_history_20news.pkl', 'wb') as f:
pickle.dump(val_curve, f)
make_plots(loss_curve, val_curve)
Benchmarking MLP performance on MNIST dataset
Classification performance:
Classifier train-time test-time error-rate
MLP_SGD_constant_momentum 105.07s 0.10s 0.0205
MLP_SGD_adaptive_nesterov 166.20s 0.10s 0.0213
MLP_SGD_constant_nesterov 123.11s 0.11s 0.0219
MLP_Adam 49.43s 0.26s 0.0224
MLP_SGD_constant_no_momentum 532.17s 0.11s 0.0231
MLP_Adam_early 19.61s 0.12s 0.0241
MLP_SGD_adaptive_nesterov_early 57.51s 0.11s 0.0251
MLP_SGD_constant_nesterov_early 29.66s 0.11s 0.0283
MLP_SGD_constant_no_momentum_early 95.10s 0.11s 0.0388
MLP_SGD_invscaling_nesterov 46.28s 0.14s 0.0785
MLP_SGD_invscaling_nesterov_early 17.27s 0.12s 0.0817
with learning_rate_init=0.1 for sgd:
Classification performance:
Classifier train-time test-time error-rate
MLP_SGD_constant_momentum 37.69s 0.13s 0.0170
MLP_SGD_constant_nesterov 48.36s 0.13s 0.0171
MLP_SGD_adaptive_nesterov 91.48s 0.13s 0.0171
MLP_SGD_adaptive_nesterov_early 57.50s 0.12s 0.0197
MLP_SGD_constant_no_momentum 112.04s 0.13s 0.0204
MLP_SGD_constant_nesterov_early 19.50s 0.14s 0.0213
MLP_Adam 55.58s 0.14s 0.0224
MLP_SGD_constant_no_momentum_early 39.07s 0.13s 0.0229
MLP_Adam_early 22.73s 0.13s 0.0241
MLP_SGD_invscaling_nesterov 107.90s 0.15s 0.0304
MLP_SGD_invscaling_nesterov_early 44.23s 0.16s 0.0345
from __future__ import print_function
import os
import cPickle as pickle
from time import time
import argparse
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.datasets import get_data_home
from sklearn.externals.joblib import Memory
from sklearn.metrics import zero_one_loss
from sklearn.utils import check_array
from sklearn.neural_network import MLPClassifier
# Memoize the data extraction and memory map the resulting
# train / test splits in readonly mode
memory = Memory(os.path.join(get_data_home(), 'mnist_benchmark_data'),
def load_data(dtype=np.float32, order='F'):
"""Load the data, then cache and memmap the train/test split"""
# Load dataset
print("Loading dataset...")
data = fetch_mldata('MNIST original')
X = check_array(data['data'], dtype=dtype, order=order)
y = data["target"]
# Normalize features
X = X / 255
# Create train-test split (as [Joachims, 2006])
print("Creating train-test split...")
n_train = 60000
X_train = X[:n_train]
y_train = y[:n_train]
X_test = X[n_train:]
y_test = y[n_train:]
return X_train, X_test, y_train, y_test
def make_plots(loss, val_loss):
non_early = [name for name in loss if not name.endswith('_early')]
early = [name for name in loss if name.endswith('_early')]
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
# not including MLP_SGD_constant_no_momentum because the number of
# iterations is too large
make_sub_plot({name: loss[name] for name in non_early
if name != 'MLP_SGD_constant_no_momentum'}, axes.ravel()[0])
for name, ax in zip(early, axes.ravel()[1:]):
make_sub_plot({name[:-6]: loss[name[:-6]], name: loss[name],
name + '_val': val_loss[name]}, ax)
def make_sub_plot(loss, ax):
plot_args = [{'c': 'red', 'linestyle': '-'},
{'c': 'green', 'linestyle': '-'},
{'c': 'blue', 'linestyle': '-'},
{'c': 'red', 'linestyle': '--'},
{'c': 'green', 'linestyle': '--'},
{'c': 'blue', 'linestyle': '--'}]
for label, loss_curve, args in zip(loss.keys(), loss.values(), plot_args):
ax.plot(loss_curve, label=label, **args)
if len(loss) > 3:
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(0.95, 1.30), fontsize=11)
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(1.05, 1.20), fontsize=11)
'MLP_SGD_constant_no_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_no_momentum_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False,
'MLP_SGD_constant_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1),
'MLP_SGD_constant_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'MLP_SGD_invscaling_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'MLP_SGD_invscaling_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling', early_stopping=True),
'MLP_SGD_adaptive_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'MLP_SGD_adaptive_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive', early_stopping=True),
'MLP_Adam': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1),
'MLP_Adam_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1, early_stopping=True),
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--classifiers', nargs="+",
choices=ESTIMATORS.keys() + ['all'], type=str,
'MLP_Adam', 'MLP_Adam_early',
help="list of classifiers to benchmark.")
parser.add_argument('--n-jobs', nargs="?", default=1, type=int,
help="Number of concurrently running workers for "
"models that support parallelism.")
parser.add_argument('--order', nargs="?", default="C", type=str,
choices=["F", "C"],
help="Allow to choose between fortran and C ordered "
parser.add_argument('--random-seed', nargs="?", default=0, type=int,
help="Common seed used by random number generator.")
args = vars(parser.parse_args())
X_train, X_test, y_train, y_test = load_data(order=args["order"])
print("Dataset statistics:")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print("%s %d (size=%dMB)" % ("number of train samples:".ljust(25),
X_train.shape[0], int(X_train.nbytes / 1e6)))
print("%s %d (size=%dMB)" % ("number of test samples:".ljust(25),
X_test.shape[0], int(X_test.nbytes / 1e6)))
print("Training Classifiers")
error, train_time, test_time, loss_curve, val_curve = {}, {}, {}, {}, {}
if 'all' in args['classifiers']:
args['classifiers'] = ESTIMATORS.keys()
for name in sorted(args["classifiers"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(**{p: args["random_seed"]
for p in estimator_params
if p.endswith("random_state")})
if "n_jobs" in estimator_params:
time_start = time(), y_train)
train_time[name] = time() - time_start
time_start = time()
y_pred = estimator.predict(X_test)
test_time[name] = time() - time_start
error[name] = zero_one_loss(y_test, y_pred)
loss_curve[name] = estimator.loss_curve_
val_curve[name] = getattr(estimator, 'validation_scores_', [])
print("Classification performance:")
print("{0: <39} {1: >10} {2: >11} {3: >12}"
"".format("Classifier ", "train-time", "test-time", "error-rate"))
print("-" * 76)
for name in sorted(args["classifiers"], key=error.get):
print("{0: <40} {1: >10.2f}s {2: >10.2f}s {3: >12.4f}"
"".format(name, train_time[name], test_time[name], error[name]))
with open('loss_history_mnist.pkl', 'wb') as f:
pickle.dump(loss_curve, f)
with open('val_loss_history_mnist.pkl', 'wb') as f:
pickle.dump(val_curve, f)
make_plots(loss_curve, val_curve)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
h = .02 # step size in the mesh
'SGD_constant_no_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'SGD_constant_no_momentum_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False,
'SGD_constant_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'SGD_constant_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1),
'SGD_constant_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'SGD_invscaling_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'SGD_invscaling_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling', early_stopping=True),
'SGD_adaptive_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'SGD_adaptive_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive', early_stopping=True),
'Adam': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1),
'Adam_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1, early_stopping=True),
names = ESTIMATORS.keys()
classifiers = ESTIMATORS.values()
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)
datasets = [make_moons(noise=0.3, random_state=0),
make_circles(noise=0.2, factor=0.5, random_state=1),
figure = plt.figure(figsize=(27, 9))
i = 1
# iterate over datasets
for ds in datasets:
# preprocess dataset, split into training and test part
X, y = ds
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
# just plot the dataset first
cm =
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
# Plot the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
i += 1
# iterate over classifiers
cnt = 0
for name, clf in zip(names, classifiers):
cnt += 1
ax = plt.subplot(len(datasets), len(classifiers) + 1, i), y_train)
score = clf.score(X_test, y_test)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
if hasattr(clf, "decision_function"):
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
# Put the result into a color plot
Z = Z.reshape(xx.shape)
ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)
# Plot also the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
if cnt % 2 == 0:
ax.set_title(name, fontsize=10, y=1.08)
ax.set_title(name, fontsize=10)
ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
size=15, horizontalalignment='right')
i += 1
figure.subplots_adjust(left=.02, right=.98)
Classification performance:
Classifier train-time test-time Hamming Loss
MLP_SGD_constant_no_momentum 5535.1194s 13.0709s 0.0136
MLP_SGD_adaptive_nesterov_early 1295.6843s 11.7958s 0.0139
MLP_SGD_constant_nesterov_early 860.3760s 11.6086s 0.0139
MLP_Adam_early 570.8964s 11.9987s 0.0141
MLP_SGD_adaptive_nesterov 5878.9259s 12.5683s 0.0144
MLP_Adam 2589.0170s 14.1955s 0.0145
MLP_SGD_constant_momentum 2408.1225s 11.9756s 0.0145
MLP_SGD_constant_nesterov 4265.0237s 11.8534s 0.0145
MLP_SGD_invscaling_nesterov 751.3171s 11.2348s 0.0315
MLP_SGD_constant_no_momentum_early 50.1751s 11.1671s 0.0320
MLP_SGD_invscaling_nesterov_early 70.1446s 11.3409s 0.0320
from __future__ import print_function, division
from time import time
import cPickle as pickle
import argparse
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_rcv1
from sklearn.metrics import hamming_loss
from sklearn.utils.validation import check_array
from sklearn.neural_network import MLPClassifier
def make_plots(loss, val_loss):
non_early = [name for name in loss if not name.endswith('_early')]
early = [name for name in loss if name.endswith('_early')]
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
make_sub_plot({name: loss[name] for name in non_early}, axes.ravel()[0])
for name, ax in zip(early, axes.ravel()[1:]):
make_sub_plot({name[:-6]: loss[name[:-6]], name: loss[name],
name + '_val': val_loss[name]}, ax)
def make_sub_plot(loss, ax):
plot_args = [{'c': 'red', 'linestyle': '-'},
{'c': 'green', 'linestyle': '-'},
{'c': 'blue', 'linestyle': '-'},
{'c': 'red', 'linestyle': '--'},
{'c': 'green', 'linestyle': '--'},
{'c': 'blue', 'linestyle': '--'}]
for label, loss_curve, args in zip(loss.keys(), loss.values(), plot_args):
ax.plot(loss_curve, label=label, **args)
if len(loss) > 3:
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(0.95, 1.30), fontsize=11)
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(1.05, 1.20), fontsize=11)
'MLP_SGD_constant_no_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_no_momentum_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False,
'MLP_SGD_constant_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1),
'MLP_SGD_constant_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'MLP_SGD_invscaling_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'MLP_SGD_invscaling_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling', early_stopping=True),
'MLP_SGD_adaptive_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
'MLP_SGD_adaptive_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive', early_stopping=True),
'MLP_Adam': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1),
'MLP_Adam_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1, early_stopping=True),
# Data
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-e', '--estimators', nargs="+", required=True,
choices=ESTIMATORS.keys() + ['all'])
args = vars(parser.parse_args())
data_train = fetch_rcv1(subset="train", shuffle=True, random_state=1)
data_test = fetch_rcv1(subset="test", shuffle=True, random_state=1)
X_train = check_array(, dtype=np.float32,
X_test = check_array(, dtype=np.float32, accept_sparse="csr")
y_train =
y_test =
print("X_train.shape = {0}".format(X_train.shape))
print("X_train.format = {0}".format(X_train.format))
print("X_train.dtype = {0}".format(X_train.dtype))
print("X_train density = {0}"
"".format(X_train.nnz / np.product(X_train.shape)))
print("y_train {0}".format(y_train.shape))
print("X_test {0}".format(X_test.shape))
print("X_test.format = {0}".format(X_test.format))
print("X_test.dtype = {0}".format(X_test.dtype))
print("y_test {0}".format(y_test.shape))
print("Classifier Training")
if 'all' in args['estimators']:
args['estimators'] = ESTIMATORS.keys()
hmg_loss, train_time, test_time, loss_curve, val_curve = {}, {}, {}, {}, {}
for name in sorted(args["estimators"]):
clf = ESTIMATORS[name]
except (TypeError, ValueError):
print("Training %s ... " % name, end="")
t0 = time(), y_train)
train_time[name] = time() - t0
t0 = time()
y_pred = clf.predict(X_test)
test_time[name] = time() - t0
hmg_loss[name] = hamming_loss(y_test, y_pred)
loss_curve[name] = clf.loss_curve_
val_curve[name] = getattr(clf, 'validation_scores_', [])
print("Classification performance:")
print("%s %s %s %s" % ("Classifier ", "train-time", "test-time",
"Hamming Loss"))
print("-" * 67)
for name in sorted(hmg_loss, key=hmg_loss.get):
print("%s %s %s %s" % (name.ljust(36),
("%.4fs" % train_time[name]).center(10),
("%.4fs" % test_time[name]).center(10),
("%.4f" % hmg_loss[name]).center(10)))
with open('loss_history_rcv1.pkl', 'wb') as f:
pickle.dump(loss_curve, f)
with open('val_loss_history_rcv1.pkl', 'wb') as f:
pickle.dump(val_curve, f)
make_plots(loss_curve, val_curve)
mean std
Sparse: 0.0586632259687 0.00355379776739 0.0561765789986 0.00206648457631
dummy: 0.0553110162417 0.00247213620297
from __future__ import print_function
import sys
from scipy.sparse import issparse
from sklearn.utils.extmath import safe_sparse_dot
import numpy as np
import time
def dummy_dot(a, b):
if issparse(a) or issparse(b):
raise ValueError
return, b)
def compare():
tests = [(np.random.rand(1000,10000), np.random.rand(10000)) for i in range(10)]
start = time.time()
for a, b in tests:
safe_sparse_dot(a, b)
elapsed_sparse = time.time() - start
start = time.time()
for a, b in tests:, b)
elapsed_npdot = time.time() - start
start = time.time()
for a, b in tests:
dummy_dot(a, b)
elapsed_dummy = time.time() - start
return elapsed_sparse, elapsed_npdot, elapsed_dummy
def main():
times = []
n = 300
for i in range(n):
sys.stdout.write('\rFinished {} out of {}'.format(i+1, n))
times_sparse, times_npdot, times_dummy = map(np.array, zip(*times))
avg_sparse, std_sparse = times_sparse.mean(), times_sparse.std()
avg_npdot, std_npdot = times_npdot.mean(), times_npdot.std()
avg_dummy, std_dummy = times_dummy.mean(), times_dummy.std()
print(" mean std")
print("Sparse:", avg_sparse, std_sparse)
print("", avg_npdot, std_npdot)
print("dummy: ", avg_dummy, std_dummy)
if __name__ == '__main__':
