Skip to content

Instantly share code, notes, and snippets.

@dewith
Last active May 26, 2021 01:48
Show Gist options
  • Save dewith/6869438a600591a9be5ddc3977ab369c to your computer and use it in GitHub Desktop.
Save dewith/6869438a600591a9be5ddc3977ab369c to your computer and use it in GitHub Desktop.
Python function to create multiple plots to evaluate the model performance visually.
def evaluate_model(y_train, y_train_pred, y_test, y_test_pred):
'''Creates multiple plots to evaluate the model performance visually.
Parameters
----------
y_train: array
The target for the train set.
y_train_pred: array
The predicted target for the train set.
y_test: numpy array
The target for the test set.
y_test_pred: array
The predicted target for the test set.
Returns
-------
Matplotlib GridSpec
Notes
------
Requirements: Matplotlib, Seaborn.
'''
from matplotlib.ticker import FuncFormatter
from matplotlib.gridspec import GridSpec
# This is optional, it's made for large values (i.e., money)
def kilos(x, pos):
'The two args are the value and tick position'
return '%3.0fK' % (x * 1e-3)
formatter = FuncFormatter(kilos)
# End of optional
fig = plt.figure(constrained_layout=False, figsize=(12, 9))
widths = [5, 5]
heights = [3, 5]
gs = fig.add_gridspec(ncols=2, nrows=2, wspace=0.4, hspace=0.35,
width_ratios=widths,
height_ratios=heights)
# Histogram of Errors
ax1 = fig.add_subplot(gs[0, :])
ax1.set_title('Histogram of Errors')
train_errors = y_train - y_train_pred
test_errors = y_test - y_test_pred
rango = (min(np.min(train_errors),
np.min(test_errors)),
max(np.max(train_errors),
np.max(test_errors)))
sns.distplot(train_errors, kde=False, bins=45, label='Train',
hist_kws={'range': rango, 'alpha': 1},
ax=ax1)
sns.distplot(test_errors, kde=False, bins=45, label='Test',
hist_kws={'range': rango, 'alpha': 1},
ax=ax1)
ax1.set_ylabel('Quantity')
ax1.set_xlabel('Error')
ax1.legend(fancybox=True, loc='right')
ax1.xaxis.set_major_formatter(formatter)
# Residuals plot
ax2 = fig.add_subplot(gs[1, 0])
ax2.set_title('Residuals Plot')
ax2.scatter(y_train_pred, y_train_pred - y_train,
s=25, edgecolor='white', linewidths=0.5, alpha=1,
label='Train')
ax2.scatter(y_test_pred, y_test_pred - y_test,
s=25, edgecolor='white', linewidths=0.5, alpha=1,
label='Test')
ax2.hlines(y=0,
xmin=np.min(ax2.get_xlim()),
xmax=np.max(ax2.get_xlim()),
color='black', alpha=0.9,
lw=2)
ax2.set_xlabel('Predicted values')
ax2.set_ylabel('Residuals')
ax2.legend(fancybox=True, loc='lower right')
ax2.xaxis.set_major_formatter(formatter)
ax2.yaxis.set_major_formatter(formatter)
# Prediction errors plot
ax3 = fig.add_subplot(gs[1, 1])
ax3.set_title('Prediction Errors Plot')
ax3.scatter(y_test, y_test_pred, color=red,
s=25, edgecolor='white', linewidths=0.5, alpha=1)
lims = [np.min([ax3.get_xlim(), ax3.get_ylim()]),
np.max([ax3.get_xlim(), ax3.get_ylim()])]
ax3.plot(lims, lims, '-k', alpha=1, lw=2, label='Identity')
sns.regplot(y_test, y_test_pred, scatter=False, ci=None,
color='black', label='Best fit',
line_kws={'ls': '--', 'alpha':0.6, 'lw':2})
ax3.set_xlabel('Real (test)')
ax3.set_ylabel('Predicted (test)')
ax3.legend(fancybox=True, loc='lower right')
ax3.xaxis.set_major_formatter(formatter)
ax3.yaxis.set_major_formatter(formatter)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment