Skip to content

Instantly share code, notes, and snippets.

@gatapia
Created April 28, 2014 02:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gatapia/11360437 to your computer and use it in GitHub Desktop.
Save gatapia/11360437 to your computer and use it in GitHub Desktop.
import sklearn as sk
import numpy as np
import scipy as scipy
import cPickle as pickle
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import ShuffleSplit
from scipy.stats import sem
# Models
from sklearn.ensemble import RandomForestClassifier
def mean_score(scores):
return ("Mean: {0:.3f} (+/-{1:.3f})").format(np.mean(scores), sem(scores))
def do_cv(model, n_samples=1000, n_iter=3, test_size=0.1):
cv = ShuffleSplit(n_samples, n_iter=n_iter, test_size=test_size, random_state=seed)
test_scores = cross_val_score(model, X_train, y_train, cv=cv)
print(mean_score(test_scores))
seed = 0
f = open( "all_data.p", "rb" )
data = pickle.load(f)
f.close()
X_train = data['train_munged'];
y_train = data['test_munged'];
y = data['y'];
print "Loaded train[" + `len(train_munged)` + "] test[" + `len(test_munged)` + "]"
classifier = RandomForestClassifier(450, max_features=36, random_state=seed, n_jobs=1)
do_cv(classifier, n_samples=5000, n_iter=10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment