Skip to content

Instantly share code, notes, and snippets.

@izahn
Created June 3, 2021 18:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save izahn/de4964fa75d3a844780cdc0a35cea04f to your computer and use it in GitHub Desktop.
Save izahn/de4964fa75d3a844780cdc0a35cea04f to your computer and use it in GitHub Desktop.
## Adapted from https://www.featureranking.com/tutorials/machine-learning-tutorials/sk-part-5-advanced-topics-pipelines-statistical-model-comparison-and-model-deployment/
## by Ista
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn import preprocessing
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif
cancer_df = load_breast_cancer()
Data_unscaled, target = cancer_df.data, cancer_df.target
data_scaler = preprocessing.MinMaxScaler().fit(Data_unscaled)
Data = data_scaler.transform(Data_unscaled)
cv_method = RepeatedStratifiedKFold(n_splits=5,
n_repeats=2,
random_state=999)
pipe_KNN = Pipeline([('fselector', SelectKBest()),
('knn', KNeighborsClassifier())])
params_pipe_KNN = {'fselector__score_func': [f_classif, mutual_info_classif],
'fselector__k': [10, 20, Data.shape[1]],
'knn__n_neighbors': [1, 2, 3, 4, 5],
'knn__p': [1, 2]}
gs_pipe_KNN = GridSearchCV(estimator=pipe_KNN,
param_grid=params_pipe_KNN,
cv=cv_method,
n_jobs=16,
scoring='roc_auc',
verbose=1)
gs_pipe_KNN.fit(Data, target)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment