mikebenfield/prob_classification.py

## prob_classification.py
import time
import sys

from sklearn import ensemble, datasets, model_selection, metrics
import numpy as np

n_estimators = int(sys.argv[1])

rs = np.random.RandomState(12345)

X, y = datasets.make_classification(n_samples=10000, n_features=12,
                                    n_informative=12, n_redundant=0,
                                    n_repeated=0, random_state=rs)
X = X.astype(np.float32)

X_train, X_test, y_train, y_test = \
                model_selection.train_test_split(X, y, test_size=0.8,
                                                 random_state=rs)

rfc = ensemble.RandomForestClassifier(n_estimators=n_estimators,
                                      n_jobs=-1, random_state=rs)
time1 = time.perf_counter()
rfc.fit(X_train, y_train)
time2 = time.perf_counter()
proba = rfc.predict_proba(X_test)
time3 = time.perf_counter()

print("{:5.3f} sec to fit".format(time2-time1))
print("{:5.3f} sec to predict".format(time3-time2))
print("{:5.3f} brier score".format(metrics.brier_score_loss(y_test, proba[:, 1])))
	import time
	import sys

	from sklearn import ensemble, datasets, model_selection, metrics
	import numpy as np

	n_estimators = int(sys.argv[1])

	rs = np.random.RandomState(12345)

	X, y = datasets.make_classification(n_samples=10000, n_features=12,
	n_informative=12, n_redundant=0,
	n_repeated=0, random_state=rs)
	X = X.astype(np.float32)

	X_train, X_test, y_train, y_test = \
	model_selection.train_test_split(X, y, test_size=0.8,
	random_state=rs)

	rfc = ensemble.RandomForestClassifier(n_estimators=n_estimators,
	n_jobs=-1, random_state=rs)
	time1 = time.perf_counter()
	rfc.fit(X_train, y_train)
	time2 = time.perf_counter()
	proba = rfc.predict_proba(X_test)
	time3 = time.perf_counter()

	print("{:5.3f} sec to fit".format(time2-time1))
	print("{:5.3f} sec to predict".format(time3-time2))
	print("{:5.3f} brier score".format(metrics.brier_score_loss(y_test, proba[:, 1])))