Skip to content

Instantly share code, notes, and snippets.

View edunuke's full-sized avatar

Eduardo Perez Denadai edunuke

  • dataquantix
  • Panamá
View GitHub Profile
def init(params):
try:
regularizer = params["regularizer"]
constraint = keras.constraints.UnitNorm(axis=0)
risk_embed_dim =int(np.round(len(set(risk_df.credit_score)) ** (1/4)))
products_embed_dim =int(np.round(len(set(products_df.product_code)) ** (1/4)))
user_embed_dim =int(np.round(len(set(users_df.client_id)) ** (1/4)))
# user branch
user_input = keras.layers.Input(shape=(user_shape[-1]-1,), name='user_input')
# Hyperparameter search space
params= {
'nodes1': hp.choice('n1',[100,]),
'nodes2': hp.choice('n2',[200]),
'nodes3': hp.choice('n3',[300]),
'nodes4': hp.choice('n4', [200]),
'nodes5': hp.choice('n5', [100]),
'batch_size': hp.choice('bs', [1000]),
'regularizer': hp.choice('reg', [None,]),
'optimizer': hp.choice('opt', [keras.optimizers.Adam(lr=0.001,
# set up the input data so it can be read by the neural network
input_train = [user_train.drop(['client_id'],axis=1).values,
product_train.drop(['client_id'],axis=1).values,
risk_train.drop(['client_id'],axis=1).values]
input_val = [user_val.drop('client_id',axis=1).values,
product_val.drop(['client_id'],axis=1).values,
risk_val.drop('client_id',axis=1).values]
# split dataset 80/20
traintest_split = model_selection.train_test_split(products_df.drop(TARGET_NAME,axis=1),
risk_df,
users_df,
y_class, test_size=0.2, random_state=SEED)
# Unpack the result in their individual parts
product_train, product_val, risk_train, risk_val, user_train, user_val, y_class_train, y_class_val, = traintest_split
#target variable vector
y_class = pd.get_dummies(products_df[TARGET_NAME])
# print(y_class[0].values)
#[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# load datasets
users_df = pd.read_csv("dataset/users_data.csv", index_col=0)
risk_df = pd.read_csv("dataset/risk_data.csv", index_col=0)
products_df = pd.read_csv("dataset/products_data.csv", index_col=0)
# Static Constants
SEED = 123
TARGET_NAME = "product_code"
RESAMPLED= False
import os
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import keras
import keras.backend as K
from sklearn.utils import class_weight
from sklearn import dummy, metrics, model_selection, datasets, preprocessing
from sklearn.manifold import TSNE
from lime import submodular_pick
#set up sp lime with 20 samples. The more amount of samples time increases dramatically
sp_obj = submodular_pick.SubmodularPick(explainer,
X_train.values[:500],
predict,
sample_size=20,
num_features=9,
num_exps_desired=5)
import lime
import lime.lime_tabular
from sklearn import linear_model
# set up the LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(X_train,
training_labels = Y_train.values,
feature_names = X_train.columns,
mode = 'regression',
discretize_continuous = False)
# get the ovearall mean contribution of each feature variable
shap.summary_plot(shap_values[0], X_train.values[:500], feature_names=X_train.columns)