Skip to content

Instantly share code, notes, and snippets.

@jsun
Created September 17, 2018 07:13
Show Gist options
  • Save jsun/236dd404b7a92ba75d0688fc3fec3868 to your computer and use it in GitHub Desktop.
Save jsun/236dd404b7a92ba75d0688fc3fec3868 to your computer and use it in GitHub Desktop.
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
# load data
cancer = datasets.load_breast_cancer()
x = cancer.data
y = cancer.target
print(x.shape)
## (569, 30)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
print(x_train.shape)
## (455, 30)
# model learning pipeline
ppln = Pipeline([
('scale', StandardScaler()),
('pca', PCA(0.80)),
('clf', SVC())
])
# set parameter ranges for glid search
param_grid = [
{
'clf__kernel': ['linear'],
'clf__C': 10 ** np.linspace(-5, 5, 20),
},
{
'clf__kernel': ['rbf'],
'clf__C': 10 ** np.linspace(-5, 5, 20),
'clf__gamma': 10 ** np.linspace(-5, 5, 20)
},
{
'clf__kernel': ['sigmoid'],
'clf__C': 10 ** np.linspace(-5, 5, 20),
'clf__gamma': 10 ** np.linspace(-5, 5, 20)
}
]
# perform grid search with 10-fold cross validation
gs = GridSearchCV(estimator=ppln, param_grid=param_grid, scoring='f1', cv=10, n_jobs=1)
gs = gs.fit(x_train, y_train)
print(gs.best_score_)
## 0.9827698973640098
print(gs.best_params_)
## {'clf__C': 29763.51441631313, 'clf__gamma': 0.000379269019073225, 'clf__kernel': 'rbf'}
print(gs.best_estimator_)
## Pipeline(memory=None,
## steps=[('scale', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=0.8, random_state=None,
## svd_solver='auto', tol=0.0, whiten=False)), ('clf', SVC(C=29763.51441631313, cache_size=200, class_weight=None, coef0=0.0,
## decision_function_shape='ovr', degree=3, gamma=0.000379269019073225,
## kernel='rbf', max_iter=-1, probability=False, random_state=None,
## shrinking=True, tol=0.001, verbose=False))])
# get the best prediction model
clf = gs.best_estimator_
print(clf.score(x_test, y_test))
## 0.9473684210526315
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment