Skip to content

Instantly share code, notes, and snippets.

@fclesio
Created March 22, 2020 14:28
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fclesio/1fc8ae544b55059a1eee9557901daee3 to your computer and use it in GitHub Desktop.
Save fclesio/1fc8ae544b55059a1eee9557901daee3 to your computer and use it in GitHub Desktop.
Sensibility difference between RMSE and RMSLE
import pandas as pd
import math
import numpy as np
# Create dataframe
df_preds = pd.DataFrame(columns=['y', 'y_hat'])
# Fill it
df_preds.loc[len(df_preds)] = [1, 1]
df_preds.loc[len(df_preds)] = [2, 3]
df_preds.loc[len(df_preds)] = [50, 55]
df_preds.loc[len(df_preds)] = [500, 502]
df_preds.loc[len(df_preds)] = [1000000, 1000005]
# Check
df_preds
# y y_hat
# 0 1 1
# 1 2 3
# 2 50 55
# 3 500 502
# 4 1000000 1000005
# Create functions
def rmse(predictions, targets):
'''Source: https://stackoverflow.com/questions/17197492/is-there-a-library-function-for-root-mean-square-error-rmse-in-python'''
return np.sqrt(((predictions - targets) ** 2).mean())
def rmsle(predict, target):
'''Source: https://towardsdatascience.com/metrics-and-python-850b60710e0c'''
total = 0
for k in range(len(predict)):
LPred= np.log1p(predict[k]+1)
LTarg = np.log1p(target[k] + 1)
if not (math.isnan(LPred)) and not (math.isnan(LTarg)):
total = total + ((LPred-LTarg) **2)
total = total / len(predict)
return np.sqrt(total)
# Check data before executiion
df_preds
# y y_hat
# 0 1 1
# 1 2 3
# 2 50 55
# 3 500 502
# 4 1000000 1000005
# Get stats
print ('RMSE: ' + str(rmse(df_preds['y_hat'].values, df_preds['y'].values)))
print ('RMSLE: ' + str(rmsle(df_preds['y_hat'].values, df_preds['y'].values)))
# RMSE: 3.3166247903554
# RMSLE: 0.1079235658917167
# Increase the error in the biggest number in terms of magnitude (from 5 to 500)
# Create dataframe
df_preds = pd.DataFrame(columns=['y', 'y_hat'])
# Fill it
df_preds.loc[len(df_preds)] = [1, 1]
df_preds.loc[len(df_preds)] = [2, 3]
df_preds.loc[len(df_preds)] = [50, 55]
df_preds.loc[len(df_preds)] = [500, 502]
df_preds.loc[len(df_preds)] = [1000000, 1000500]
# Check
df_preds
# y y_hat
# 0 1 1
# 1 2 3
# 2 50 55
# 3 500 502
# 4 1000000 1000500
# The RMSE exploded, but the RMSLE stayed the same due to not penalize the error in bigger magnitude predictions
print ('RMSE: ' + str(rmse(df_preds['y_hat'].values, df_preds['y'].values)))
print ('RMSLE: ' + str(rmsle(df_preds['y_hat'].values, df_preds['y'].values)))
# RMSE: 223.6202137553759
# RMSLE: 0.10792379739703087
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment