Skip to content

Instantly share code, notes, and snippets.

@lorransr
Last active October 7, 2022 20:51
Show Gist options
  • Save lorransr/635df9f2e899ddd42fb9454afc4f245e to your computer and use it in GitHub Desktop.
Save lorransr/635df9f2e899ddd42fb9454afc4f245e to your computer and use it in GitHub Desktop.
Simple Training Multiple ML Models with Sklearn
#Mostly copied from: https://towardsdatascience.com/quickly-test-multiple-models-a98477476f0
from typing import List, Tuple
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn import model_selection
from sklearn.utils import class_weight
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
MODELS = [
('LogReg', LogisticRegression()),
('RF', RandomForestClassifier()),
('KNN', KNeighborsClassifier()),
('SVM', SVC()),
('GNB', GaussianNB()),
('XGB', XGBClassifier())
]
SCORING = ['accuracy', 'precision_weighted', 'recall_weighted', 'f1_weighted', 'roc_auc']
TARGET_NAMES = ['malignant', 'benign']
def run_exps(X_train: pd.DataFrame , y_train: pd.DataFrame, X_test: pd.DataFrame,
y_test: pd.DataFrame,models: List[Tuple],scoring: List,target_names: List
) -> pd.DataFrame:
'''
Lightweight script to test many models and find winners
:param X_train: training split
:param y_train: training target vector
:param X_test: test split
:param y_test: test target vector
:return: DataFrame of predictions
'''
dfs = []
results = []
names = []
for name, model in models:
kfold = model_selection.KFold(n_splits=5, shuffle=True, random_state=90210)
cv_results = model_selection.cross_validate(model, X_train, y_train, cv=kfold, scoring=scoring)
clf = model.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(name)
print(classification_report(y_test, y_pred, target_names=target_names))
results.append(cv_results)
names.append(name)
this_df = pd.DataFrame(cv_results)
this_df['model'] = name
dfs.append(this_df)
final = pd.concat(dfs, ignore_index=True)
return final
X, y = load_breast_cancer(return_X_y=True,as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
final = run_exps(X_train,y_train,X_test,y_test,MODELS,SCORING,TARGET_NAMES)
bootstraps = []
for model in list(set(final.model.values)):
model_df = final.loc[final.model == model]
bootstrap = model_df.sample(n=30, replace=True)
bootstraps.append(bootstrap)
bootstrap_df = pd.concat(bootstraps, ignore_index=True)
results_long = pd.melt(bootstrap_df,id_vars=['model'],var_name='metrics', value_name='values')
time_metrics = ['fit_time','score_time'] # fit time metrics
## PERFORMANCE METRICS
results_long_nofit = results_long.loc[~results_long['metrics'].isin(time_metrics)] # get df without fit data
results_long_nofit = results_long_nofit.sort_values(by='values')
## TIME METRICS
results_long_fit = results_long.loc[results_long['metrics'].isin(time_metrics)] # df with fit data
results_long_fit = results_long_fit.sort_values(by='values')
#Compare plot
plt.figure(figsize=(20, 12))
sns.set(font_scale=2.5)
g = sns.boxplot(x="model", y="values", hue="metrics", data=results_long_nofit, palette="Set3")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.title('Comparison of Model by Classification Metric')
plt.savefig('./benchmark_models_performance.png',dpi=300)
#Confidence Intervals
metrics = list(set(results_long_nofit.metrics.values))
bootstrap_df.groupby(['model'])[metrics].agg([np.std, np.mean])
#Confidence Intervals Time Metric
time_metrics = list(set(results_long_fit.metrics.values))
bootstrap_df.groupby(['model'])[time_metrics].agg([np.std, np.mean])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment