Skip to content

Instantly share code, notes, and snippets.

@mtzl
Created July 12, 2016 08:53
Show Gist options
  • Save mtzl/b220f7bd9e2433fc0701f3df3ac16988 to your computer and use it in GitHub Desktop.
Save mtzl/b220f7bd9e2433fc0701f3df3ac16988 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
==============================================================
Grid Search over multiple Pipeline Configurations & Estimators
==============================================================
This example constructs a pipeline that does an optional prescaling step,
then dimensionality reduction followed by prediction with a decision tree
ensemble. It demonstrates the use of GridSearchCV and Pipeline to optimize
over different classes of estimators in a single CV run -- Gradient Boosted
Decision Trees are compared to Random Forests during the grid search.
"""
# Authors: Robert McGibbon, Joel Nothman, Moritz Lotze
from __future__ import print_function, division
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, cross_val_predict
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, f_classif
print(__doc__)
pipe = Pipeline([
('prescale', RobustScaler()),
('reduce_dim', PCA()),
('classify', RandomForestClassifier())
])
N_FEATURES_OPTIONS = [2, 4, ]
param_grid = [
{
'prescale': [RobustScaler(), None],
'reduce_dim': [PCA(iterated_power=7), ],
'reduce_dim__n_components': N_FEATURES_OPTIONS,
'classify': [RandomForestClassifier(), GradientBoostingClassifier()],
},
{
'reduce_dim': [SelectKBest(f_classif)],
'reduce_dim__k': N_FEATURES_OPTIONS,
'classify': [RandomForestClassifier(), GradientBoostingClassifier()],
},
]
grid = GridSearchCV(pipe, cv=3, n_jobs=8, param_grid=param_grid)
iris = load_iris()
grid.fit(iris.data, iris.target)
preds = cross_val_predict(grid, iris.data, iris.target)
accs = accuracy_score(preds, iris.target)
print(accs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment