Skip to content

Instantly share code, notes, and snippets.

@robertmaxwilliams
Created June 6, 2018 18:30
Show Gist options
  • Save robertmaxwilliams/f685424775dc86c92a656a4c5f3774d8 to your computer and use it in GitHub Desktop.
Save robertmaxwilliams/f685424775dc86c92a656a4c5f3774d8 to your computer and use it in GitHub Desktop.

How to Classify a sklearn classifier

from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
dir(DecisionTreeClassifier)
['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_cache',
 '_abc_negative_cache',
 '_abc_negative_cache_version',
 '_abc_registry',
 '_estimator_type',
 '_get_param_names',
 '_validate_X_predict',
 'apply',
 'decision_path',
 'feature_importances_',
 'fit',
 'get_params',
 'predict',
 'predict_log_proba',
 'predict_proba',
 'score',
 'set_params']
set(dir(DecisionTreeClassifier)) - set(dir(DecisionTreeRegressor))
{'predict_log_proba', 'predict_proba'}
set(dir(DecisionTreeRegressor)) - set(dir(DecisionTreeClassifier))
set()
classy = [ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier, MLPClassifier, LogisticRegression, SVC, DecisionTreeClassifier]
reg = [RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, GaussianProcessRegressor, KernelRidge,
       MLPRegressor, SVR, DecisionTreeRegressor, LinearRegression, Lasso]

from functools import reduce

class_things = reduce((lambda a, b: a&b), (set(dir(a)) for a in classy))

reg_things = reduce((lambda a, b: a&b), (set(dir(a)) for a in reg))
class_things
{'__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_estimator_type',
 '_get_param_names',
 'fit',
 'get_params',
 'predict',
 'predict_log_proba',
 'predict_proba',
 'score',
 'set_params'}
reg_things
{'__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_estimator_type',
 '_get_param_names',
 'fit',
 'get_params',
 'predict',
 'score',
 'set_params'}
class_things - reg_things
{'predict_log_proba', 'predict_proba'}
reg_things - class_things
set()
def class_count(module):
    return module.__dict__['__doc__'].lower().count('class')
def reg_count(module):
    return module.__dict__['__doc__'].lower().count('regress')

def is_classifier(module):
    """some rought heristics based on docstring"""
    c = class_count(module) 
    r = reg_count(module)
    if r == 0 and c > 0:
        return True
    if c == 0 and r > 0:
        return False
    if c - r >= 3:
        return True
    if r - c >= 3:
        return False
    return 'unsure'

def is_classifier_dir(module):
    """all classifier have this attribute, and no regressors do"""
    return 'predict_proba' in dir(module)

class_count(RandomForestClassifier), reg_count(RandomForestClassifier)

is_classifier(RandomForestClassifier)
True
for x in classy + reg:
    print(is_classifier(x), is_classifier_dir(x), class_count(x), reg_count(x), x.__name__, sep='\t')
True	True	24	0	ExtraTreesClassifier
True	True	28	0	RandomForestClassifier
True	True	22	1	AdaBoostClassifier
True	True	7	0	MLPClassifier
True	True	25	5	LogisticRegression
True	True	26	1	SVC
True	True	20	2	DecisionTreeClassifier
False	False	1	9	RandomForestRegressor
False	False	1	5	ExtraTreesRegressor
False	False	3	11	AdaBoostRegressor
False	False	0	2	GaussianProcessRegressor
False	False	0	9	KernelRidge
unsure	False	2	2	MLPRegressor
False	False	0	4	SVR
False	False	2	6	DecisionTreeRegressor
False	False	1	4	LinearRegression
unsure	False	3	4	Lasso
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment