Skip to content

Instantly share code, notes, and snippets.

@robertmaxwilliams
Last active June 6, 2018 18:47
Show Gist options
  • Save robertmaxwilliams/4bcfdfe347488a9fe57b69c8de6d99f7 to your computer and use it in GitHub Desktop.
Save robertmaxwilliams/4bcfdfe347488a9fe57b69c8de6d99f7 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier\n",
"from sklearn.gaussian_process import GaussianProcessRegressor\n",
"from sklearn.kernel_ridge import KernelRidge\n",
"from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression\n",
"from sklearn.neural_network import MLPRegressor, MLPClassifier\n",
"from sklearn.svm import SVC, SVR\n",
"from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier\n",
"from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier\n",
"from sklearn.gaussian_process import GaussianProcessRegressor\n",
"from sklearn.kernel_ridge import KernelRidge\n",
"from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression\n",
"from sklearn.neural_network import MLPRegressor, MLPClassifier\n",
"from sklearn.svm import SVC, SVR\n",
"from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['__abstractmethods__',\n",
" '__class__',\n",
" '__delattr__',\n",
" '__dict__',\n",
" '__dir__',\n",
" '__doc__',\n",
" '__eq__',\n",
" '__format__',\n",
" '__ge__',\n",
" '__getattribute__',\n",
" '__getstate__',\n",
" '__gt__',\n",
" '__hash__',\n",
" '__init__',\n",
" '__init_subclass__',\n",
" '__le__',\n",
" '__lt__',\n",
" '__module__',\n",
" '__ne__',\n",
" '__new__',\n",
" '__reduce__',\n",
" '__reduce_ex__',\n",
" '__repr__',\n",
" '__setattr__',\n",
" '__setstate__',\n",
" '__sizeof__',\n",
" '__str__',\n",
" '__subclasshook__',\n",
" '__weakref__',\n",
" '_abc_cache',\n",
" '_abc_negative_cache',\n",
" '_abc_negative_cache_version',\n",
" '_abc_registry',\n",
" '_estimator_type',\n",
" '_get_param_names',\n",
" '_validate_X_predict',\n",
" 'apply',\n",
" 'decision_path',\n",
" 'feature_importances_',\n",
" 'fit',\n",
" 'get_params',\n",
" 'predict',\n",
" 'predict_log_proba',\n",
" 'predict_proba',\n",
" 'score',\n",
" 'set_params']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dir(DecisionTreeClassifier)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'predict_log_proba', 'predict_proba'}"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"set(dir(DecisionTreeClassifier)) - set(dir(DecisionTreeRegressor))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"set()"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"set(dir(DecisionTreeRegressor)) - set(dir(DecisionTreeClassifier))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"classy = [ExtraTreesClassifier, RandomForestClassifier, AdaBoostClassifier, MLPClassifier, LogisticRegression, SVC, DecisionTreeClassifier]\n",
"reg = [RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, GaussianProcessRegressor, KernelRidge,\n",
" MLPRegressor, SVR, DecisionTreeRegressor, LinearRegression, Lasso]\n",
"\n",
"from functools import reduce\n",
"\n",
"class_things = reduce((lambda a, b: a&b), (set(dir(a())) for a in classy))\n",
"\n",
"reg_things = reduce((lambda a, b: a&b), (set(dir(a())) for a in reg))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'__class__',\n",
" '__delattr__',\n",
" '__dict__',\n",
" '__dir__',\n",
" '__doc__',\n",
" '__eq__',\n",
" '__format__',\n",
" '__ge__',\n",
" '__getattribute__',\n",
" '__getstate__',\n",
" '__gt__',\n",
" '__hash__',\n",
" '__init__',\n",
" '__init_subclass__',\n",
" '__le__',\n",
" '__lt__',\n",
" '__module__',\n",
" '__ne__',\n",
" '__new__',\n",
" '__reduce__',\n",
" '__reduce_ex__',\n",
" '__repr__',\n",
" '__setattr__',\n",
" '__setstate__',\n",
" '__sizeof__',\n",
" '__str__',\n",
" '__subclasshook__',\n",
" '__weakref__',\n",
" '_estimator_type',\n",
" '_get_param_names',\n",
" 'fit',\n",
" 'get_params',\n",
" 'predict',\n",
" 'predict_log_proba',\n",
" 'predict_proba',\n",
" 'random_state',\n",
" 'score',\n",
" 'set_params'}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"class_things"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'__class__',\n",
" '__delattr__',\n",
" '__dict__',\n",
" '__dir__',\n",
" '__doc__',\n",
" '__eq__',\n",
" '__format__',\n",
" '__ge__',\n",
" '__getattribute__',\n",
" '__getstate__',\n",
" '__gt__',\n",
" '__hash__',\n",
" '__init__',\n",
" '__init_subclass__',\n",
" '__le__',\n",
" '__lt__',\n",
" '__module__',\n",
" '__ne__',\n",
" '__new__',\n",
" '__reduce__',\n",
" '__reduce_ex__',\n",
" '__repr__',\n",
" '__setattr__',\n",
" '__setstate__',\n",
" '__sizeof__',\n",
" '__str__',\n",
" '__subclasshook__',\n",
" '__weakref__',\n",
" '_estimator_type',\n",
" '_get_param_names',\n",
" 'fit',\n",
" 'get_params',\n",
" 'predict',\n",
" 'score',\n",
" 'set_params'}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_things"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'predict_log_proba', 'predict_proba', 'random_state'}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"class_things - reg_things"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"set()"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_things - class_things"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def class_count(module):\n",
" return module.__dict__['__doc__'].lower().count('class')\n",
"\n",
"def reg_count(module):\n",
" return module.__dict__['__doc__'].lower().count('regress')\n",
"\n",
"def is_classifier_doc(module):\n",
" \"\"\"some rought heristics based on docstring\"\"\"\n",
" c = class_count(module) \n",
" r = reg_count(module)\n",
" if r == 0 and c > 0:\n",
" return True\n",
" if c == 0 and r > 0:\n",
" return False\n",
" if c - r >= 3:\n",
" return True\n",
" if r - c >= 3:\n",
" return False\n",
" return 'unsure'\n",
"\n",
"def is_classifier_dir(module):\n",
" \"\"\"all classifier have this attribute, and no regressors do\"\"\"\n",
" return 'predict_proba' in dir(module)\n",
"\n",
"def percent_classifier(module):\n",
" \"\"\" returns a percent chance that the given sklearn model is a classifier \"\"\"\n",
" doc = is_classifier_doc(module) \n",
" dirr = is_classifier_dir(module) \n",
"\n",
" converter = {True: 1, 'unsure': 0.5, False: 0}\n",
" return (converter[doc] + converter[dirr])/2\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\tTrue\t1.0\t24\t0\tExtraTreesClassifier\n",
"True\tTrue\t1.0\t28\t0\tRandomForestClassifier\n",
"True\tTrue\t1.0\t22\t1\tAdaBoostClassifier\n",
"True\tTrue\t1.0\t7\t0\tMLPClassifier\n",
"True\tTrue\t1.0\t25\t5\tLogisticRegression\n",
"True\tTrue\t1.0\t26\t1\tSVC\n",
"True\tTrue\t1.0\t20\t2\tDecisionTreeClassifier\n",
"False\tFalse\t0.0\t1\t9\tRandomForestRegressor\n",
"False\tFalse\t0.0\t1\t5\tExtraTreesRegressor\n",
"False\tFalse\t0.0\t3\t11\tAdaBoostRegressor\n",
"False\tFalse\t0.0\t0\t2\tGaussianProcessRegressor\n",
"False\tFalse\t0.0\t0\t9\tKernelRidge\n",
"unsure\tFalse\t0.25\t2\t2\tMLPRegressor\n",
"False\tFalse\t0.0\t0\t4\tSVR\n",
"False\tFalse\t0.0\t2\t6\tDecisionTreeRegressor\n",
"False\tFalse\t0.0\t1\t4\tLinearRegression\n",
"unsure\tFalse\t0.25\t3\t4\tLasso\n"
]
}
],
"source": [
"for x in classy + reg:\n",
" print(is_classifier_doc(x), is_classifier_dir(x), percent_classifier(x), class_count(x), reg_count(x), x.__name__, sep='\\t')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@robertmaxwilliams
Copy link
Author

The only difference if you instantiate the models is that you get {'predict_log_proba', 'predict_proba', 'random_state'} as class only attributes.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment