This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # get target and feature variables | |
| X = df.drop('price',axis=1) | |
| Y = df.price | |
| #set them as categorical | |
| X[['cut','color','clarity']] = X[['cut','color','clarity']].astype('category') | |
| # get the maps to the encoding | |
| cut_cat_map = [x for x in zip(X.cut.cat.categories,X.cut.cat.codes.unique() )] | |
| color_cat_map = [x for x in zip(X.color.cat.categories,X.cut.cat.codes.unique() )] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # separate train/test set | |
| import sklearn | |
| X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size = 0.33, random_state = 5) | |
| print('Y_train: ', Y_train.shape) | |
| print('Y_test: ', Y_test.shape) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from keras.layers import Input, Dense, BatchNormalization | |
| from keras.models import Model | |
| from keras.callbacks import EarlyStopping | |
| # set input layer | |
| inputs = Input(shape=(X_train.shape[1],), name='input') | |
| # normalized the batches | |
| x = BatchNormalization(name='input_bn')(inputs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import shap | |
| #initialize js methods for visualization | |
| shap.initjs() | |
| # create an instance of the DeepSHAP which is called DeepExplainer | |
| explainer_shap = shap.DeepExplainer(model=model, | |
| data=X_train) | |
| # Fit the explainer on a subset of the data (you can try all but then gets slower) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # now let's inspect some individual explanations inferred by DeepSHAP | |
| shap.force_plot(explainer_shap.expected_value, | |
| shap_values[0][0], | |
| feature_names=X_train.columns) | |
| shap.force_plot(explainer_shap.expected_value, | |
| shap_values[0][0][1], | |
| X_train.values[:500][0], | |
| feature_names=X_train.columns,) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # to get the output value and base value | |
| record = 1 # this is just to pick one record in the dataset | |
| base_value = explainer_2.expected_value | |
| output= base_value + np.sum(shap_values[0][0][record]) | |
| print('base value: ',base_value) | |
| print('output value: ',output) | |
| #sanity check that the ouput value is equal to the actual prediction | |
| print(np.round(output,decimals=1) == np.round(model.predict(X_train.values)[record],decimals=1)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # get the ovearall mean contribution of each feature variable | |
| shap.summary_plot(shap_values[0], X_train.values[:500], feature_names=X_train.columns) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import lime | |
| import lime.lime_tabular | |
| from sklearn import linear_model | |
| # set up the LIME explainer | |
| explainer = lime.lime_tabular.LimeTabularExplainer(X_train, | |
| training_labels = Y_train.values, | |
| feature_names = X_train.columns, | |
| mode = 'regression', | |
| discretize_continuous = False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from lime import submodular_pick | |
| #set up sp lime with 20 samples. The more amount of samples time increases dramatically | |
| sp_obj = submodular_pick.SubmodularPick(explainer, | |
| X_train.values[:500], | |
| predict, | |
| sample_size=20, | |
| num_features=9, | |
| num_exps_desired=5) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import numpy as np | |
| import pandas as pd | |
| import pickle | |
| import matplotlib.pyplot as plt | |
| import keras | |
| import keras.backend as K | |
| from sklearn.utils import class_weight | |
| from sklearn import dummy, metrics, model_selection, datasets, preprocessing | |
| from sklearn.manifold import TSNE |