Skip to content

Instantly share code, notes, and snippets.

@tobigue
Created July 27, 2012 15:45
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save tobigue/3188762 to your computer and use it in GitHub Desktop.
Save tobigue/3188762 to your computer and use it in GitHub Desktop.
Sklearn GridSearchCV vs. CrossValidation
from sklearn.linear_model import SGDClassifier
from sklearn import cross_validation
from sklearn import metrics
from sklearn.grid_search import GridSearchCV
from sklearn.datasets import load_iris
data = load_iris()
sample_vector = data.data
targets = data.target
cv = cross_validation.StratifiedKFold(targets, 10)
score_func = metrics.f1_score
parameters = {
'seed': [0],
'loss': ('log', 'hinge'),
'penalty': ['l1', 'l2', 'elasticnet'],
'alpha': [0.001, 0.0001, 0.00001, 0.000001]
}
print
print "GRID SEARCH:"
grid_search = GridSearchCV(SGDClassifier(), parameters,
score_func=score_func, cv=cv)
grid_search.fit(sample_vector, targets)
print "Best %s: %0.3f" % (score_func.__name__, grid_search.best_score_)
print "Best parameters set:"
best_parameters = grid_search.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
print "\t%s: %r" % (param_name, best_parameters[param_name])
print
print "CROSS VALIDATION:"
clf = SGDClassifier(**best_parameters)
scores = cross_validation.cross_val_score(clf, sample_vector, targets,
cv=cv, score_func=score_func)
print 'Best %s: %0.3f (+/- %0.2f)' % \
(score_func.__name__, scores.mean(), scores.std() / 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment