Skip to content

Instantly share code, notes, and snippets.

@netsatsawat
Created May 19, 2019 17:29
Show Gist options
  • Save netsatsawat/37c84dbd3d6546eb918f31d7350672b5 to your computer and use it in GitHub Desktop.
Save netsatsawat/37c84dbd3d6546eb918f31d7350672b5 to your computer and use it in GitHub Desktop.
Function to plot the model evaluation with test data
def prediction_evaluation (algorithm, X_train, X_test, y_train, y_test,
predictor_cols, cf = 'features'):
"""
Function to predict and evaluate the provided algorithm by using Plotly library
to visualize the confusion matrix, ROC curve as well as provided the feature importances.
@Args:
algorithm: the model algorithm object
X_train: the predictor features of the training pandas data frame
X_test: the predictor features of the testing pandas data frame
y_train: the target variable of the training pandas data frame
y_test: the target variable of the testing pandas data frame
cf: toggle the mode on how to get the informaiton out from the model,
the input only accepts 2 possible list of values.
LOV - 'coefficients': specifically for logistic regression
- 'features': specifically for tree-based model
Return:
prediction and probabilities
"""
if cf not in ['features', 'coefficients']:
# Exception case - return None
print("ERROR: Mode Toggle (cf parameters) is not in LOV. Please recheck")
return None, None
algorithm.fit(X_train, y_train)
y_pred = algorithm.predict(X_test)
y_prob = algorithm.predict_proba(X_test)
algorithm_name = str(algorithm).split('(', 1)[0]
if cf == 'coefficients':
coeff = pd.DataFrame(algorithm.coef_.ravel())
elif cf == 'features':
coeff = pd.DataFrame(algorithm.feature_importances_)
col_df = pd.DataFrame(predictor_cols)
coef_smry = pd.merge(coeff, col_df, left_index=True, right_index=True, how='left')
coef_smry.columns = ['coefficients', 'features']
coef_smry = coef_smry.sort_values(by='coefficients', ascending=False)
conf_matrix = confusion_matrix(y_test, y_pred)
# compute metric
tp = conf_matrix[1,1]
fn = conf_matrix[1,0]
fp = conf_matrix[0,1]
tn = conf_matrix[0,0]
accuracy_ = ((tp + tn) / (tp + tn + fp + fn))
precision_ = (tp / (tp + fp))
recall_ = (tp / (tp + fn))
f1_score_ = f1_score(y_test, y_pred)
model_roc_auc = roc_auc_score(y_test, y_pred)
# Print report
print(algorithm)
print("\nClassification report: \n", classification_report(y_test, y_pred))
print("\nAccuracy Score: ", np.round(accuracy_score(y_test, y_pred), 4))
print("F1 Score: ", np.round(f1_score_, 4))
print("Area Under Curve: ", np.round(model_roc_auc, 4), "\n")
# Trace 1: plot confusion matrix
trace1 = go.Heatmap(z = conf_matrix,
x = ['Not Leave', 'Leave'],
y = ['Not Leave', 'Leave'],
showscale = False,
colorscale = 'Picnic',
name = "Confusion Matrix"
)
# Trace 2: plot model metrics
show_metrics = pd.DataFrame(data=[[accuracy_ , precision_, recall_, f1_score_]])
show_metrics = show_metrics.T
colors = ['gold', 'lightgreen', 'lightcoral', 'lightskyblue']
trace2 = go.Bar(x=(show_metrics[0].values),
y=['Accuracy', 'Precision', 'Recall', 'F1 score'],
text=np.round_(show_metrics[0].values,4),
name='',
textposition='auto',
orientation='h',
opacity=0.8,
marker=dict(color=colors,
line=dict(color='#000000',
width=1.5)
)
)
# Trace 3: plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_prob[:, 1])
trace3 = go.Scatter(x = fpr,
y = tpr,
name = "ROC: " + str(model_roc_auc),
line = dict(color = 'rgb(22, 96, 197)',
width = 2
)
)
trace4 = go.Scatter(x = [0, 1],
y = [0, 1],
line = dict(color = 'rgb(205, 12, 24)',
width = 1.5,
dash = 'dot'
)
)
# Trace 4: plot precision-recall curve
__precision, __recall, t = precision_recall_curve(y_test, y_prob[:, 1])
trace5 = go.Scatter(x=__recall,
y=__precision,
name="Precision %s" % str(__precision),
line=dict(color=('lightcoral'),
width = 2),
fill='tozeroy'
)
# Trace 5: plot coeffs
trace6 = go.Bar(x = coef_smry['features'],
y = coef_smry['coefficients'],
name = "coefficients",
marker = dict(color = coef_smry['coefficients'],
colorscale = 'Picnic',
line = dict(width = .6, color = 'black')
)
)
# subplots
fig = tls.make_subplots(rows = 3, cols = 2,
specs = [[{}, {}],
[{}, {}],
[{'colspan': 2}, None]],
subplot_titles = ('Confusion Matrix',
'Metrics',
'Receiver Operating Characteristics (ROC)',
'Precision - Recall curve',
'Feature Importances'
)
)
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 2, 1)
fig.append_trace(trace4, 2, 1)
fig.append_trace(trace5, 2, 2)
fig.append_trace(trace6, 3, 1)
fig['layout'].update(showlegend = False, title = "Model Performance of {}".format(algorithm_name),
autosize = False,
height = 1000,
width = 800,
plot_bgcolor = 'rgba(240, 240, 240, 0.95)',
paper_bgcolor = 'rgba(240, 240, 240, 0.95)',
margin = dict(b = 195)
)
fig['layout']['xaxis1'].update(dict(title="Prediction"))
fig['layout']['yaxis1'].update(dict(title="Actual"))
fig["layout"]["xaxis2"].update((dict(range=[0, 1])))
fig['layout']['xaxis3'].update(dict(title="False Positive Rate"))
fig['layout']['yaxis3'].update(dict(title='True Positive Rate'))
fig["layout"]["xaxis4"].update(dict(title="recall"),
range=[0, 1.05])
fig["layout"]["yaxis4"].update(dict(title="precision"),
range=[0, 1.05])
fig['layout']['xaxis5'].update(dict(showgrid=True,
tickfont=dict(size = 10),
tickangle=90
)
)
fig.layout.titlefont.size = 14
py.iplot(fig)
return y_pred, y_prob
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment