Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Sensibility difference between RMSE and RMSLE
import pandas as pd
import math
import numpy as np
# Create dataframe
df_preds = pd.DataFrame(columns=['y', 'y_hat'])
# Fill it
df_preds.loc[len(df_preds)] = [1, 1]
df_preds.loc[len(df_preds)] = [2, 3]
df_preds.loc[len(df_preds)] = [50, 55]
df_preds.loc[len(df_preds)] = [500, 502]
df_preds.loc[len(df_preds)] = [1000000, 1000005]
# Check
df_preds
# y y_hat
# 0 1 1
# 1 2 3
# 2 50 55
# 3 500 502
# 4 1000000 1000005
# Create functions
def rmse(predictions, targets):
'''Source: https://stackoverflow.com/questions/17197492/is-there-a-library-function-for-root-mean-square-error-rmse-in-python'''
return np.sqrt(((predictions - targets) ** 2).mean())
def rmsle(predict, target):
'''Source: https://towardsdatascience.com/metrics-and-python-850b60710e0c'''
total = 0
for k in range(len(predict)):
LPred= np.log1p(predict[k]+1)
LTarg = np.log1p(target[k] + 1)
if not (math.isnan(LPred)) and not (math.isnan(LTarg)):
total = total + ((LPred-LTarg) **2)
total = total / len(predict)
return np.sqrt(total)
# Check data before executiion
df_preds
# y y_hat
# 0 1 1
# 1 2 3
# 2 50 55
# 3 500 502
# 4 1000000 1000005
# Get stats
print ('RMSE: ' + str(rmse(df_preds['y_hat'].values, df_preds['y'].values)))
print ('RMSLE: ' + str(rmsle(df_preds['y_hat'].values, df_preds['y'].values)))
# RMSE: 3.3166247903554
# RMSLE: 0.1079235658917167
# Increase the error in the biggest number in terms of magnitude (from 5 to 500)
# Create dataframe
df_preds = pd.DataFrame(columns=['y', 'y_hat'])
# Fill it
df_preds.loc[len(df_preds)] = [1, 1]
df_preds.loc[len(df_preds)] = [2, 3]
df_preds.loc[len(df_preds)] = [50, 55]
df_preds.loc[len(df_preds)] = [500, 502]
df_preds.loc[len(df_preds)] = [1000000, 1000500]
# Check
df_preds
# y y_hat
# 0 1 1
# 1 2 3
# 2 50 55
# 3 500 502
# 4 1000000 1000500
# The RMSE exploded, but the RMSLE stayed the same due to not penalize the error in bigger magnitude predictions
print ('RMSE: ' + str(rmse(df_preds['y_hat'].values, df_preds['y'].values)))
print ('RMSLE: ' + str(rmsle(df_preds['y_hat'].values, df_preds['y'].values)))
# RMSE: 223.6202137553759
# RMSLE: 0.10792379739703087
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.