Created
May 19, 2019 17:29
-
-
Save netsatsawat/37c84dbd3d6546eb918f31d7350672b5 to your computer and use it in GitHub Desktop.
Function to plot the model evaluation with test data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def prediction_evaluation (algorithm, X_train, X_test, y_train, y_test, | |
predictor_cols, cf = 'features'): | |
""" | |
Function to predict and evaluate the provided algorithm by using Plotly library | |
to visualize the confusion matrix, ROC curve as well as provided the feature importances. | |
@Args: | |
algorithm: the model algorithm object | |
X_train: the predictor features of the training pandas data frame | |
X_test: the predictor features of the testing pandas data frame | |
y_train: the target variable of the training pandas data frame | |
y_test: the target variable of the testing pandas data frame | |
cf: toggle the mode on how to get the informaiton out from the model, | |
the input only accepts 2 possible list of values. | |
LOV - 'coefficients': specifically for logistic regression | |
- 'features': specifically for tree-based model | |
Return: | |
prediction and probabilities | |
""" | |
if cf not in ['features', 'coefficients']: | |
# Exception case - return None | |
print("ERROR: Mode Toggle (cf parameters) is not in LOV. Please recheck") | |
return None, None | |
algorithm.fit(X_train, y_train) | |
y_pred = algorithm.predict(X_test) | |
y_prob = algorithm.predict_proba(X_test) | |
algorithm_name = str(algorithm).split('(', 1)[0] | |
if cf == 'coefficients': | |
coeff = pd.DataFrame(algorithm.coef_.ravel()) | |
elif cf == 'features': | |
coeff = pd.DataFrame(algorithm.feature_importances_) | |
col_df = pd.DataFrame(predictor_cols) | |
coef_smry = pd.merge(coeff, col_df, left_index=True, right_index=True, how='left') | |
coef_smry.columns = ['coefficients', 'features'] | |
coef_smry = coef_smry.sort_values(by='coefficients', ascending=False) | |
conf_matrix = confusion_matrix(y_test, y_pred) | |
# compute metric | |
tp = conf_matrix[1,1] | |
fn = conf_matrix[1,0] | |
fp = conf_matrix[0,1] | |
tn = conf_matrix[0,0] | |
accuracy_ = ((tp + tn) / (tp + tn + fp + fn)) | |
precision_ = (tp / (tp + fp)) | |
recall_ = (tp / (tp + fn)) | |
f1_score_ = f1_score(y_test, y_pred) | |
model_roc_auc = roc_auc_score(y_test, y_pred) | |
# Print report | |
print(algorithm) | |
print("\nClassification report: \n", classification_report(y_test, y_pred)) | |
print("\nAccuracy Score: ", np.round(accuracy_score(y_test, y_pred), 4)) | |
print("F1 Score: ", np.round(f1_score_, 4)) | |
print("Area Under Curve: ", np.round(model_roc_auc, 4), "\n") | |
# Trace 1: plot confusion matrix | |
trace1 = go.Heatmap(z = conf_matrix, | |
x = ['Not Leave', 'Leave'], | |
y = ['Not Leave', 'Leave'], | |
showscale = False, | |
colorscale = 'Picnic', | |
name = "Confusion Matrix" | |
) | |
# Trace 2: plot model metrics | |
show_metrics = pd.DataFrame(data=[[accuracy_ , precision_, recall_, f1_score_]]) | |
show_metrics = show_metrics.T | |
colors = ['gold', 'lightgreen', 'lightcoral', 'lightskyblue'] | |
trace2 = go.Bar(x=(show_metrics[0].values), | |
y=['Accuracy', 'Precision', 'Recall', 'F1 score'], | |
text=np.round_(show_metrics[0].values,4), | |
name='', | |
textposition='auto', | |
orientation='h', | |
opacity=0.8, | |
marker=dict(color=colors, | |
line=dict(color='#000000', | |
width=1.5) | |
) | |
) | |
# Trace 3: plot ROC curve | |
fpr, tpr, thresholds = roc_curve(y_test, y_prob[:, 1]) | |
trace3 = go.Scatter(x = fpr, | |
y = tpr, | |
name = "ROC: " + str(model_roc_auc), | |
line = dict(color = 'rgb(22, 96, 197)', | |
width = 2 | |
) | |
) | |
trace4 = go.Scatter(x = [0, 1], | |
y = [0, 1], | |
line = dict(color = 'rgb(205, 12, 24)', | |
width = 1.5, | |
dash = 'dot' | |
) | |
) | |
# Trace 4: plot precision-recall curve | |
__precision, __recall, t = precision_recall_curve(y_test, y_prob[:, 1]) | |
trace5 = go.Scatter(x=__recall, | |
y=__precision, | |
name="Precision %s" % str(__precision), | |
line=dict(color=('lightcoral'), | |
width = 2), | |
fill='tozeroy' | |
) | |
# Trace 5: plot coeffs | |
trace6 = go.Bar(x = coef_smry['features'], | |
y = coef_smry['coefficients'], | |
name = "coefficients", | |
marker = dict(color = coef_smry['coefficients'], | |
colorscale = 'Picnic', | |
line = dict(width = .6, color = 'black') | |
) | |
) | |
# subplots | |
fig = tls.make_subplots(rows = 3, cols = 2, | |
specs = [[{}, {}], | |
[{}, {}], | |
[{'colspan': 2}, None]], | |
subplot_titles = ('Confusion Matrix', | |
'Metrics', | |
'Receiver Operating Characteristics (ROC)', | |
'Precision - Recall curve', | |
'Feature Importances' | |
) | |
) | |
fig.append_trace(trace1, 1, 1) | |
fig.append_trace(trace2, 1, 2) | |
fig.append_trace(trace3, 2, 1) | |
fig.append_trace(trace4, 2, 1) | |
fig.append_trace(trace5, 2, 2) | |
fig.append_trace(trace6, 3, 1) | |
fig['layout'].update(showlegend = False, title = "Model Performance of {}".format(algorithm_name), | |
autosize = False, | |
height = 1000, | |
width = 800, | |
plot_bgcolor = 'rgba(240, 240, 240, 0.95)', | |
paper_bgcolor = 'rgba(240, 240, 240, 0.95)', | |
margin = dict(b = 195) | |
) | |
fig['layout']['xaxis1'].update(dict(title="Prediction")) | |
fig['layout']['yaxis1'].update(dict(title="Actual")) | |
fig["layout"]["xaxis2"].update((dict(range=[0, 1]))) | |
fig['layout']['xaxis3'].update(dict(title="False Positive Rate")) | |
fig['layout']['yaxis3'].update(dict(title='True Positive Rate')) | |
fig["layout"]["xaxis4"].update(dict(title="recall"), | |
range=[0, 1.05]) | |
fig["layout"]["yaxis4"].update(dict(title="precision"), | |
range=[0, 1.05]) | |
fig['layout']['xaxis5'].update(dict(showgrid=True, | |
tickfont=dict(size = 10), | |
tickangle=90 | |
) | |
) | |
fig.layout.titlefont.size = 14 | |
py.iplot(fig) | |
return y_pred, y_prob | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment