Skip to content

Instantly share code, notes, and snippets.

@mikebenfield
Created March 30, 2017 18:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikebenfield/934657ccc4eafeba130cacd515ef4106 to your computer and use it in GitHub Desktop.
Save mikebenfield/934657ccc4eafeba130cacd515ef4106 to your computer and use it in GitHub Desktop.
import time
import sys
from sklearn import ensemble, datasets, model_selection, metrics
import numpy as np
n_estimators = int(sys.argv[1])
rs = np.random.RandomState(12345)
X, y = datasets.make_classification(n_samples=10000, n_features=12,
n_informative=12, n_redundant=0,
n_repeated=0, random_state=rs)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = \
model_selection.train_test_split(X, y, test_size=0.8,
random_state=rs)
rfc = ensemble.RandomForestClassifier(n_estimators=n_estimators,
n_jobs=-1, random_state=rs)
time1 = time.perf_counter()
rfc.fit(X_train, y_train)
time2 = time.perf_counter()
proba = rfc.predict_proba(X_test)
time3 = time.perf_counter()
print("{:5.3f} sec to fit".format(time2-time1))
print("{:5.3f} sec to predict".format(time3-time2))
print("{:5.3f} brier score".format(metrics.brier_score_loss(y_test, proba[:, 1])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment