Skip to content

Instantly share code, notes, and snippets.

@mikewlange
Created December 4, 2019 17:47
Show Gist options
  • Save mikewlange/db50b6bb8a39a92897add409438c0fd9 to your computer and use it in GitHub Desktop.
Save mikewlange/db50b6bb8a39a92897add409438c0fd9 to your computer and use it in GitHub Desktop.
/CreditWorkup/Explaining Blackbox Classifiers.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "## Setup a classification experiment"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-11-24T04:25:39.435075Z",
"start_time": "2019-11-24T04:25:21.385590Z"
},
"trusted": true
},
"cell_type": "code",
"source": "\n# os.environ[\"MODIN_ENGINE\"] = \"dask\" # Modin will use Dask\nimport os\nos.environ[\"MODIN_ENGINE\"] = \"dask\" # Modin will use Ray\nimport modin.experimental.pandas as pd\nfrom sklearn.datasets import load_boston\nfrom sklearn.model_selection import train_test_split\n\nfrom interpret.provider import InlineProvider\nfrom interpret import set_visualize_provider\nset_visualize_provider(InlineProvider())\ndf = pd.read_csv('train.csv')",
"execution_count": 16,
"outputs": [
{
"ename": "ImportError",
"evalue": "The pandas version installed does not match the required pandas version in Modin. Please install pandas 0.25.3 to use Modin.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-16-e72523c34a73>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menviron\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"MODIN_ENGINE\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"dask\"\u001b[0m \u001b[0;31m# Modin will use Ray\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mmodin\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperimental\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatasets\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mload_boston\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_selection\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/modin/experimental/pandas/__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menviron\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"MODIN_EXPERIMENTAL\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"True\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mmodin\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpandas\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;31m# noqa F401, F403\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mio_exp\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mread_sql\u001b[0m \u001b[0;31m# noqa F401\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/modin/pandas/__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\"The pandas version installed does not match the required pandas \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\"version in Modin. Please install pandas {} to use \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;34m\"Modin.\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m__pandas_version__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m )\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mImportError\u001b[0m: The pandas version installed does not match the required pandas version in Modin. Please install pandas 0.25.3 to use Modin."
]
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-11-25T14:43:01.727306Z",
"start_time": "2019-11-25T14:42:56.116217Z"
},
"trusted": false
},
"cell_type": "code",
"source": "!where pandas",
"execution_count": 15,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "/bin/sh: 1: where: not found\n"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-11-24T04:27:48.273269Z",
"start_time": "2019-11-24T04:27:47.782555Z"
},
"trusted": true
},
"cell_type": "code",
"source": "# featuretools for automated feature engineering\nimport featuretools as ft",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-11-24T04:33:21.989460Z",
"start_time": "2019-11-24T04:33:21.983150Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df.columns",
"execution_count": 4,
"outputs": [
{
"ename": "NameError",
"evalue": "name 'df' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-4-b666bf274d0a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
]
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-11-24T04:34:03.584743Z",
"start_time": "2019-11-24T04:34:02.535250Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df[df.foreclosure_status == 0]",
"execution_count": 4,
"outputs": [
{
"ename": "AttributeError",
"evalue": "'DataFrame' object has no attribute 'foreclosure_status'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-4-3447e7dab513>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforeclosure_status\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/modin/pandas/dataframe.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1900\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1901\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1902\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1903\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1904\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/modin/pandas/dataframe.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1896\u001b[0m \"\"\"\n\u001b[1;32m 1897\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1898\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1899\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1900\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'foreclosure_status'"
]
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Let's drop colums that the bank would not be aware of at time of lean. ",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-11-24T04:35:57.986082Z",
"start_time": "2019-11-24T04:35:57.975650Z"
},
"trusted": true
},
"cell_type": "code",
"source": "quantitative = [f for f in df.columns if df.dtypes[f] != 'object']\nqualitative = [f for f in df.columns if df.dtypes[f] == 'object']",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-11-24T04:36:34.494978Z",
"start_time": "2019-11-24T04:36:34.485708Z"
},
"trusted": false
},
"cell_type": "code",
"source": "qualitative",
"execution_count": 7,
"outputs": [
{
"data": {
"text/plain": "['MSZoning',\n 'Street',\n 'Alley',\n 'LotShape',\n 'LandContour',\n 'Utilities',\n 'LotConfig',\n 'LandSlope',\n 'Neighborhood',\n 'Condition1',\n 'Condition2',\n 'BldgType',\n 'HouseStyle',\n 'RoofStyle',\n 'RoofMatl',\n 'Exterior1st',\n 'Exterior2nd',\n 'MasVnrType',\n 'ExterQual',\n 'ExterCond',\n 'Foundation',\n 'BsmtQual',\n 'BsmtCond',\n 'BsmtExposure',\n 'BsmtFinType1',\n 'BsmtFinType2',\n 'Heating',\n 'HeatingQC',\n 'CentralAir',\n 'Electrical',\n 'KitchenQual',\n 'Functional',\n 'FireplaceQu',\n 'GarageType',\n 'GarageFinish',\n 'GarageQual',\n 'GarageCond',\n 'PavedDrive',\n 'PoolQC',\n 'Fence',\n 'MiscFeature',\n 'SaleType',\n 'SaleCondition']"
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "\n\n\ndf = pd.read_csv(\n \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n header=None)\ndf.columns = [\n \"Age\", \"WorkClass\", \"fnlwgt\", \"Education\", \"EducationNum\",\n \"MaritalStatus\", \"Occupation\", \"Relationship\", \"Race\", \"Gender\",\n \"CapitalGain\", \"CapitalLoss\", \"HoursPerWeek\", \"NativeCountry\", \"Income\"\n]\n# df = df.sample(frac=0.01, random_state=1)\ntrain_cols = df.columns[0:-1]\nlabel = df.columns[-1]\nX = df[train_cols]\ny = df[label].apply(lambda x: 0 if x == \" <=50K\" else 1) #Turning response into 0 and 1\n\n# We have to transform categorical variables to use sklearn models\nX_enc = pd.get_dummies(X, prefix_sep='.')\nfeature_names = list(X_enc.columns)\n\nseed = 1 \nX_train, X_test, y_train, y_test = train_test_split(X_enc, y, test_size=0.20, random_state=seed)",
"execution_count": 8,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": "UserWarning: Parameters provided defaulting to pandas implementation.\nTo request implementation, send an email to feature_requests@modin.org.\n"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Train a blackbox classification system"
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.decomposition import PCA\nfrom sklearn.pipeline import Pipeline\n\n#Blackbox system can include preprocessing, not just a classifier!\npca = PCA()\nrf = RandomForestClassifier(n_estimators=100, n_jobs=-1)\n\nblackbox_model = Pipeline([('pca', pca), ('rf', rf)])\nblackbox_model.fit(X_train, y_train)",
"execution_count": 9,
"outputs": [
{
"data": {
"text/plain": "Pipeline(memory=None,\n steps=[('pca',\n PCA(copy=True, iterated_power='auto', n_components=None,\n random_state=None, svd_solver='auto', tol=0.0,\n whiten=False)),\n ('rf',\n RandomForestClassifier(bootstrap=True, class_weight=None,\n criterion='gini', max_depth=None,\n max_features='auto',\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n min_samples_leaf=1, min_samples_split=2,\n min_weight_fraction_leaf=0.0,\n n_estimators=100, n_jobs=-1,\n oob_score=False, random_state=None,\n verbose=0, warm_start=False))],\n verbose=False)"
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Show blackbox model performance"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "from interpret import show\nfrom interpret.perf import ROC\n\nblackbox_perf = ROC(blackbox_model.predict_proba).explain_perf(X_test, y_test, name='Blackbox')\nshow(blackbox_perf)",
"execution_count": 10,
"outputs": [
{
"ename": "ValueError",
"evalue": "Could not unify data of type: <class 'modin.pandas.dataframe.DataFrame'>",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-10-49d410d1936e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0minterpret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mperf\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mROC\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mblackbox_perf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mROC\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mblackbox_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_proba\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexplain_perf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'Blackbox'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mblackbox_perf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/interpret/perf/curve.py\u001b[0m in \u001b[0;36mexplain_perf\u001b[0;34m(self, X, y, name)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m X, y, self.feature_names, self.feature_types = unify_data(\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeature_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeature_types\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m )\n\u001b[1;32m 75\u001b[0m \u001b[0mpredict_fn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munify_predict_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_fn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/interpret/utils/all.py\u001b[0m in \u001b[0;36munify_data\u001b[0;34m(data, labels, feature_names, feature_types)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Could not unify data of type: {0}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[0mlog\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 295\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 296\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 297\u001b[0m \u001b[0mnew_labels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munify_vector\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Could not unify data of type: <class 'modin.pandas.dataframe.DataFrame'>"
]
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Local Explanations: How an individual prediction was made"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "from interpret.blackbox import LimeTabular\nfrom interpret import show\n\n#Blackbox explainers need a predict function, and optionally a dataset\nlime = LimeTabular(predict_fn=blackbox_model.predict_proba, data=X_train, random_state=1)\n\n#Pick the instances to explain, optionally pass in labels if you have them\nlime_local = lime.explain_local(X_test[:5], y_test[:5], name='LIME')\n\nshow(lime_local)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "from interpret.blackbox import ShapKernel\nimport numpy as np\n\nbackground_val = np.median(X_train, axis=0).reshape(1, -1)\nshap = ShapKernel(predict_fn=blackbox_model.predict_proba, data=background_val, feature_names=feature_names)\nshap_local = shap.explain_local(X_test[:5], y_test[:5], name='SHAP')\nshow(shap_local)",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Global Explanations: How the model behaves overall"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "from interpret.blackbox import MorrisSensitivity\n\nsensitivity = MorrisSensitivity(predict_fn=blackbox_model.predict_proba, data=X_train)\nsensitivity_global = sensitivity.explain_global(name=\"Global Sensitivity\")\n\nshow(sensitivity_global)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "from interpret.blackbox import PartialDependence\n\npdp = PartialDependence(predict_fn=blackbox_model.predict_proba, data=X_train)\npdp_global = pdp.explain_global(name='Partial Dependence')\n\nshow(pdp_global)",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Compare them all in the Dashboard"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "show([blackbox_perf, lime_local, shap_local, sensitivity_global, pdp_global])",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.9",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"toc": {
"nav_menu": {},
"number_sections": false,
"sideBar": false,
"skip_h1_title": false,
"base_numbering": 1,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": false,
"toc_window_display": false
},
"varInspector": {
"window_display": false,
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"library": "var_list.py",
"delete_cmd_prefix": "del ",
"delete_cmd_postfix": "",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"library": "var_list.r",
"delete_cmd_prefix": "rm(",
"delete_cmd_postfix": ") ",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
]
},
"gist": {
"id": "",
"data": {
"description": "/CreditWorkup/Explaining Blackbox Classifiers.ipynb",
"public": true
},
"public": true,
"description": "/CreditWorkup/Explaining Blackbox Classifiers.ipynb.ipynb",
"extension": ".ipynb"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment