Created
February 11, 2020 19:54
-
-
Save kbfreder/adccbfc0e0f73af672014c90f60083a7 to your computer and use it in GitHub Desktop.
Comparing classifiers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this is a custom module | |
import assess_clf_models as acm | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, BaggingClassifier | |
from catboost import CatBoostClassifier | |
from sklearn.compose import ColumnTransformer | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer | |
from sklearn.pipeline import Pipeline, FeatureUnion | |
from sklearn.model_selection import train_test_split | |
from sklearn.feature_extraction.text import CountVectorizer | |
# define a preprocessing pipeline | |
# see https://github.com/kbfreder/small-molecules/blob/master/02c-Pipeline_ModelEval_Imbalance.ipynb | |
# for an example | |
pipe_with_scale = Pipeline([ | |
('all', FeatureUnion([ | |
('cvs', preproc_cv), | |
('feat_eng', preproc_feat_eng_scaled), | |
('ohe', preproc_ohe), | |
('pass', preproc_pass), | |
('num', preproc_scale), | |
]) | |
) | |
]) | |
log_reg = LogisticRegression(solver='liblinear') | |
rand_for = RandomForestClassifier(n_estimators=10) | |
grad_boost = GradientBoostingClassifier() | |
cb = CatBoostClassifier(cat_features=cb_cat_cols, logging_level='Silent') | |
mini_results_lr = acm.assess_model(pipe_with_scale, log_reg, mini_X, mini_y) | |
mini_results_rf = acm.assess_model(pipe_cat_only, rand_for, mini_X, mini_y) | |
mini_results_gb = acm.assess_model(pipe_cat_only, grad_boost, mini_X, mini_y) | |
mini_results_cb = acm.assess_model(pipe_cat_boost, cb, mini_X, mini_y) | |
comp_df = pd.DataFrame([mini_results_lr, mini_results_rf, mini_results_gb, mini_results_cb], | |
index=['LogReg','RandFor','GradBst', 'Cat Boost']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment