Skip to content

Instantly share code, notes, and snippets.

@cnmoro
Created December 2, 2021 19:49
Show Gist options
  • Save cnmoro/11eea791ed019d46968e11532d42dafa to your computer and use it in GitHub Desktop.
Save cnmoro/11eea791ed019d46968e11532d42dafa to your computer and use it in GitHub Desktop.
shap_feature_importance.py
import shap
import numpy as np
import pandas as pd
categoric_features = tuple(['FEATURE1', 'FEATURE2', 'ETC'])
def avaliar_importancias_features(modelo_treinado, X):
explainer = shap.Explainer(modelo_treinado)
shap_values = explainer.shap_values(X)
vals = np.abs(shap_values).mean(0)
feature_importance = pd.DataFrame(list(zip(X.columns, sum(vals))), columns=['Feature', 'Peso'])
feature_importance.sort_values(by=['Peso'], ascending=False,inplace=True)
feat_imp_dict = feature_importance.to_dict(orient='records')
feat_imp_dict_final = {}
# Somar importâncias de features categóricas
# Utilizado pois o oneHotEncoding feito pelo pd.get_dummies
# Adiciona um underline ao nome de cada feature categórica, seguido do nome da categoria
for fi in feat_imp_dict:
if fi['Feature'].startswith(categoric_features):
real_feature_name = fi['Feature'][:fi['Feature'].rfind('_')]
if real_feature_name in feat_imp_dict_final:
feat_imp_dict_final[real_feature_name] = feat_imp_dict_final[real_feature_name] + fi['Peso']
else:
feat_imp_dict_final[real_feature_name] = fi['Peso']
else:
feat_imp_dict_final[fi['Feature']] = fi['Peso']
# Retorna os mais importantes primeiro
return dict(sorted(feat_imp_dict_final.items(), key=lambda item: item[1], reverse=True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment