Skip to content

Instantly share code, notes, and snippets.

@jesserobertson
Last active February 11, 2019 05:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jesserobertson/244a03677ecbb984c6bfa379c96c021d to your computer and use it in GitHub Desktop.
Save jesserobertson/244a03677ecbb984c6bfa379c96c021d to your computer and use it in GitHub Desktop.
MASE scoring metric for Unearthed's Glencore Turn Up The Zinc Competition (https://unearthed.solutions/u/competitions/turn-zinc)
""" file: mase.py
author: Jess Robertson, jess@unearthed.solutions
date: Thursday, 31 January 2019
description: MASE scoring for Glencore competition (https://unearthed.solutions/u/competitions/turn-zinc)
"""
import numpy as np
def _mase_numeric_only(predicted, measured):
"""
Calculate the mean absolute scaled error (MASE) between predicted
and measured timeseries.
Does no checking of input vatiables, see `mase` for nan handling and
minimum sample lengths
>>> a = np.arange(4)
>>> _mase_numeric_only(a, a)
0.0
>>> _mase_numeric_only(a, a[::-1])
2.0
>>> b = np.asarray([np.nan, 2, 3, np.nan])
>>> _mase_numeric_only(a, b)
nan
>>> c = np.asarray([np.nan, 2, 3, np.inf])
>>> _mase_numeric_only(a, c)
nan
Parameters:
predicted, measured - the timeseries to compare
Returns:
the MASE score for the forecast
"""
naive_forecast_error = np.abs(measured[1:] - measured[:-1]).mean()
forecast_error = \
np.abs(measured - np.nan_to_num(predicted)) / naive_forecast_error
return np.nanmean(forecast_error)
def mase(predicted, measured, min_samples=3):
"""
Calculate the mean absolute scaled error (MASE) between a predicted
and measured timeseries
See https://www.sciencedirect.com/science/article/pii/S0169207015000448
for why you should use MASE for comparing forecasts
>>> mase([1, 2, 3, 4], [1, 2, 3, 4])
0.0
>>> mase([1, 2, 3, 4], [4, 3, 2, 1])
2.0
Will only evaulate forecasts where there are min_samples of non-NaN data
in the measured timeseries
>>> mase([1, 2], [4, 3])
Traceback (most recent call last):
...
ValueError: Need at least 3 samples to calculate MASE
>>> mase([1, 2, 3, 4], [np.nan, 2, 3, np.nan])
Traceback (most recent call last):
...
ValueError: Couldn't find any non-NaN segments longer than 3 in measurements
>>> mase([1, 2, 3, 4, 0, 5, 6, 7, 8], [1, 2, 3, 4, np.nan, 5, 6, 7, 8])
0.0
Shorter periods are ignored - for example the end section where there is
a large error is not counted because we don't have at least 3 samples in
a row of non-NaN data
>>> mase([1, 2, 3, 4, 0, 0, 0, 0], [1, 2, 3, 4, np.nan, 5, 6, 7])
0.0
Setting `min_samples=2` lets MASE take the shorter bit into account
>>> mase([1, 2, 3, 4, 0, 0, 0, 0], [1, 2, 3, 4, np.nan, 5, 6, 7], 2)
3.0
Min samples must be at least 2
>>> mase([1, 2, 3], [4, 5, 6], min_samples=1)
Traceback (most recent call last):
...
ValueError: mase.min_samples must be at least 2
Parameters:
predicted, measured - the timeseries to compare
min_samples - the minimum number of non-Nan measurements required
for a segment to be valid. Defaults to 3, must be >= 2.
Returns:
the MASE score for the forecast
"""
if min_samples < 2:
raise ValueError('mase.min_samples must be at least 2')
# Make sure we have numpy arrays
predicted = np.asarray(predicted)
measured = np.asarray(measured)
# Apply MASE over all the non-NaN slices with at least 3 hours of data
if np.isnan(measured).any():
segments = [
_mase_numeric_only(predicted[_slice], measured[_slice])
for _slice in np.ma.clump_unmasked(np.ma.masked_invalid(measured))
if abs(_slice.stop - _slice.start) > min_samples
]
if not segments:
raise ValueError("Couldn't find any non-NaN segments longer than "
"{} in measurements".format(min_samples))
score = np.mean(segments)
else:
if len(measured) < min_samples:
raise ValueError('Need at least {} samples to calculate MASE'.format(min_samples))
score = _mase_numeric_only(predicted, measured)
return score
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment