Created
December 4, 2019 17:47
-
-
Save mikewlange/db50b6bb8a39a92897add409438c0fd9 to your computer and use it in GitHub Desktop.
/CreditWorkup/Explaining Blackbox Classifiers.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Setup a classification experiment" | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-11-24T04:25:39.435075Z", | |
"start_time": "2019-11-24T04:25:21.385590Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "\n# os.environ[\"MODIN_ENGINE\"] = \"dask\" # Modin will use Dask\nimport os\nos.environ[\"MODIN_ENGINE\"] = \"dask\" # Modin will use Ray\nimport modin.experimental.pandas as pd\nfrom sklearn.datasets import load_boston\nfrom sklearn.model_selection import train_test_split\n\nfrom interpret.provider import InlineProvider\nfrom interpret import set_visualize_provider\nset_visualize_provider(InlineProvider())\ndf = pd.read_csv('train.csv')", | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"ename": "ImportError", | |
"evalue": "The pandas version installed does not match the required pandas version in Modin. Please install pandas 0.25.3 to use Modin.", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-16-e72523c34a73>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menviron\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"MODIN_ENGINE\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"dask\"\u001b[0m \u001b[0;31m# Modin will use Ray\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mmodin\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperimental\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatasets\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mload_boston\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_selection\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/modin/experimental/pandas/__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menviron\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"MODIN_EXPERIMENTAL\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"True\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mmodin\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpandas\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;31m# noqa F401, F403\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mio_exp\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mread_sql\u001b[0m \u001b[0;31m# noqa F401\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/modin/pandas/__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\"The pandas version installed does not match the required pandas \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\"version in Modin. Please install pandas {} to use \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;34m\"Modin.\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m__pandas_version__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m )\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mImportError\u001b[0m: The pandas version installed does not match the required pandas version in Modin. Please install pandas 0.25.3 to use Modin." | |
] | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-11-25T14:43:01.727306Z", | |
"start_time": "2019-11-25T14:42:56.116217Z" | |
}, | |
"trusted": false | |
}, | |
"cell_type": "code", | |
"source": "!where pandas", | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "/bin/sh: 1: where: not found\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-11-24T04:27:48.273269Z", | |
"start_time": "2019-11-24T04:27:47.782555Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# featuretools for automated feature engineering\nimport featuretools as ft", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-11-24T04:33:21.989460Z", | |
"start_time": "2019-11-24T04:33:21.983150Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df.columns", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'df' is not defined", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-4-b666bf274d0a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;31mNameError\u001b[0m: name 'df' is not defined" | |
] | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-11-24T04:34:03.584743Z", | |
"start_time": "2019-11-24T04:34:02.535250Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df[df.foreclosure_status == 0]", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"ename": "AttributeError", | |
"evalue": "'DataFrame' object has no attribute 'foreclosure_status'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-4-3447e7dab513>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforeclosure_status\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/modin/pandas/dataframe.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1900\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1901\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1902\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1903\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1904\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/modin/pandas/dataframe.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1896\u001b[0m \"\"\"\n\u001b[1;32m 1897\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1898\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1899\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1900\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'foreclosure_status'" | |
] | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "# Let's drop colums that the bank would not be aware of at time of lean. ", | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-11-24T04:35:57.986082Z", | |
"start_time": "2019-11-24T04:35:57.975650Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "quantitative = [f for f in df.columns if df.dtypes[f] != 'object']\nqualitative = [f for f in df.columns if df.dtypes[f] == 'object']", | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-11-24T04:36:34.494978Z", | |
"start_time": "2019-11-24T04:36:34.485708Z" | |
}, | |
"trusted": false | |
}, | |
"cell_type": "code", | |
"source": "qualitative", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "['MSZoning',\n 'Street',\n 'Alley',\n 'LotShape',\n 'LandContour',\n 'Utilities',\n 'LotConfig',\n 'LandSlope',\n 'Neighborhood',\n 'Condition1',\n 'Condition2',\n 'BldgType',\n 'HouseStyle',\n 'RoofStyle',\n 'RoofMatl',\n 'Exterior1st',\n 'Exterior2nd',\n 'MasVnrType',\n 'ExterQual',\n 'ExterCond',\n 'Foundation',\n 'BsmtQual',\n 'BsmtCond',\n 'BsmtExposure',\n 'BsmtFinType1',\n 'BsmtFinType2',\n 'Heating',\n 'HeatingQC',\n 'CentralAir',\n 'Electrical',\n 'KitchenQual',\n 'Functional',\n 'FireplaceQu',\n 'GarageType',\n 'GarageFinish',\n 'GarageQual',\n 'GarageCond',\n 'PavedDrive',\n 'PoolQC',\n 'Fence',\n 'MiscFeature',\n 'SaleType',\n 'SaleCondition']" | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"cell_type": "code", | |
"source": "\n\n\ndf = pd.read_csv(\n \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n header=None)\ndf.columns = [\n \"Age\", \"WorkClass\", \"fnlwgt\", \"Education\", \"EducationNum\",\n \"MaritalStatus\", \"Occupation\", \"Relationship\", \"Race\", \"Gender\",\n \"CapitalGain\", \"CapitalLoss\", \"HoursPerWeek\", \"NativeCountry\", \"Income\"\n]\n# df = df.sample(frac=0.01, random_state=1)\ntrain_cols = df.columns[0:-1]\nlabel = df.columns[-1]\nX = df[train_cols]\ny = df[label].apply(lambda x: 0 if x == \" <=50K\" else 1) #Turning response into 0 and 1\n\n# We have to transform categorical variables to use sklearn models\nX_enc = pd.get_dummies(X, prefix_sep='.')\nfeature_names = list(X_enc.columns)\n\nseed = 1 \nX_train, X_test, y_train, y_test = train_test_split(X_enc, y, test_size=0.20, random_state=seed)", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": "UserWarning: Parameters provided defaulting to pandas implementation.\nTo request implementation, send an email to feature_requests@modin.org.\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Train a blackbox classification system" | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"cell_type": "code", | |
"source": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.decomposition import PCA\nfrom sklearn.pipeline import Pipeline\n\n#Blackbox system can include preprocessing, not just a classifier!\npca = PCA()\nrf = RandomForestClassifier(n_estimators=100, n_jobs=-1)\n\nblackbox_model = Pipeline([('pca', pca), ('rf', rf)])\nblackbox_model.fit(X_train, y_train)", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "Pipeline(memory=None,\n steps=[('pca',\n PCA(copy=True, iterated_power='auto', n_components=None,\n random_state=None, svd_solver='auto', tol=0.0,\n whiten=False)),\n ('rf',\n RandomForestClassifier(bootstrap=True, class_weight=None,\n criterion='gini', max_depth=None,\n max_features='auto',\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n min_samples_leaf=1, min_samples_split=2,\n min_weight_fraction_leaf=0.0,\n n_estimators=100, n_jobs=-1,\n oob_score=False, random_state=None,\n verbose=0, warm_start=False))],\n verbose=False)" | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Show blackbox model performance" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from interpret import show\nfrom interpret.perf import ROC\n\nblackbox_perf = ROC(blackbox_model.predict_proba).explain_perf(X_test, y_test, name='Blackbox')\nshow(blackbox_perf)", | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "Could not unify data of type: <class 'modin.pandas.dataframe.DataFrame'>", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-10-49d410d1936e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0minterpret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mperf\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mROC\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mblackbox_perf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mROC\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mblackbox_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_proba\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexplain_perf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'Blackbox'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mblackbox_perf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/interpret/perf/curve.py\u001b[0m in \u001b[0;36mexplain_perf\u001b[0;34m(self, X, y, name)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m X, y, self.feature_names, self.feature_types = unify_data(\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeature_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeature_types\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m )\n\u001b[1;32m 75\u001b[0m \u001b[0mpredict_fn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munify_predict_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_fn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/interpret/utils/all.py\u001b[0m in \u001b[0;36munify_data\u001b[0;34m(data, labels, feature_names, feature_types)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Could not unify data of type: {0}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[0mlog\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 295\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 296\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 297\u001b[0m \u001b[0mnew_labels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munify_vector\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mValueError\u001b[0m: Could not unify data of type: <class 'modin.pandas.dataframe.DataFrame'>" | |
] | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Local Explanations: How an individual prediction was made" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from interpret.blackbox import LimeTabular\nfrom interpret import show\n\n#Blackbox explainers need a predict function, and optionally a dataset\nlime = LimeTabular(predict_fn=blackbox_model.predict_proba, data=X_train, random_state=1)\n\n#Pick the instances to explain, optionally pass in labels if you have them\nlime_local = lime.explain_local(X_test[:5], y_test[:5], name='LIME')\n\nshow(lime_local)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from interpret.blackbox import ShapKernel\nimport numpy as np\n\nbackground_val = np.median(X_train, axis=0).reshape(1, -1)\nshap = ShapKernel(predict_fn=blackbox_model.predict_proba, data=background_val, feature_names=feature_names)\nshap_local = shap.explain_local(X_test[:5], y_test[:5], name='SHAP')\nshow(shap_local)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Global Explanations: How the model behaves overall" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from interpret.blackbox import MorrisSensitivity\n\nsensitivity = MorrisSensitivity(predict_fn=blackbox_model.predict_proba, data=X_train)\nsensitivity_global = sensitivity.explain_global(name=\"Global Sensitivity\")\n\nshow(sensitivity_global)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from interpret.blackbox import PartialDependence\n\npdp = PartialDependence(predict_fn=blackbox_model.predict_proba, data=X_train)\npdp_global = pdp.explain_global(name='Partial Dependence')\n\nshow(pdp_global)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Compare them all in the Dashboard" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "show([blackbox_perf, lime_local, shap_local, sensitivity_global, pdp_global])", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.6.9", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": false, | |
"sideBar": false, | |
"skip_h1_title": false, | |
"base_numbering": 1, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": false, | |
"toc_window_display": false | |
}, | |
"varInspector": { | |
"window_display": false, | |
"cols": { | |
"lenName": 16, | |
"lenType": 16, | |
"lenVar": 40 | |
}, | |
"kernels_config": { | |
"python": { | |
"library": "var_list.py", | |
"delete_cmd_prefix": "del ", | |
"delete_cmd_postfix": "", | |
"varRefreshCmd": "print(var_dic_list())" | |
}, | |
"r": { | |
"library": "var_list.r", | |
"delete_cmd_prefix": "rm(", | |
"delete_cmd_postfix": ") ", | |
"varRefreshCmd": "cat(var_dic_list()) " | |
} | |
}, | |
"types_to_exclude": [ | |
"module", | |
"function", | |
"builtin_function_or_method", | |
"instance", | |
"_Feature" | |
] | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "/CreditWorkup/Explaining Blackbox Classifiers.ipynb", | |
"public": true | |
}, | |
"public": true, | |
"description": "/CreditWorkup/Explaining Blackbox Classifiers.ipynb.ipynb", | |
"extension": ".ipynb" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment