Last active
June 26, 2019 19:02
-
-
Save bradyrx/058ceb84373bffcf61ac2bad4d912b0b to your computer and use it in GitHub Desktop.
Attempt at coding the Diebold-Mariano test for MSE between two forecasts.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import t | |
import numpy as np | |
import xarray as xr | |
def autocovariance(Xi, N, k, Xs): | |
autoCov = 0 | |
T = float(N) | |
for i in np.arange(0, N-k): | |
autoCov += ((Xi[i+k])-Xs)*(Xi[i]-Xs) | |
return (1/(T))*autoCov | |
def dm_test(target, fct1, fct2, metric='rmse'): | |
"""Compute the Diebold-Mariano test for MSE between two forecasts. | |
Args: | |
target (xarray object): | |
Actual values that are trying to be forecasted. | |
fct1 (xarray object): | |
Forecasted values for forecast method 1. | |
fct2 (xarray object): | |
Forecasted values for forecast method 2. | |
metric (str): | |
Bias metric applied to forecasts. | |
* rmse | |
* mae | |
* mse | |
Returns: | |
DM_stat (xarray object): | |
DM test statistic. If negative, fct1 is preferred. If positive, fct2 is | |
preferred. | |
pval (xarray object): | |
P values associated with the DM test. | |
References: | |
* Boiroju, Naveen Kumar, et al. "A bootstrap test for equality of mean absolute | |
errors." ARPN Journal of engineering and applied sciences 6.5 (2011): 9-11. | |
* Gneiting, Tilmann, and Matthias Katzfuss. "Probabilistic forecasting." Annual | |
Review of Statistics and Its Application 1 (2014): 125-151. | |
* Harvey, David, Stephen Leybourne, and Paul Newbold. "Testing the equality of | |
prediction mean squared errors." International Journal of forecasting 13.2 | |
(1997): 281-291. | |
* Diebold, F. X. and Mariano, R. S. (1995), Comparing predictive accuracy, | |
Journal of business & economic statistics 13(3), 253-264. | |
* https://github.com/johntwk/Diebold-Mariano-Test/blob/master/dm_test.py | |
""" | |
# drop coordinates since they'll be out of sync | |
fct1 = fct1.drop('time') | |
fct2 = fct2.drop('time') | |
target = target.drop('time') | |
# length of forecast (does it make sense that T here | |
# is many forecasts? Or are they looking for each entry | |
# to be a lead time?) | |
T = float(len(target)) | |
# MSE for now | |
if metric == 'mse': | |
e1 = (target - fct1)**2 | |
e2 = (target - fct2)**2 | |
elif metric == 'rmse': | |
e1 = np.sqrt((target - fct1)**2) | |
e2 = np.sqrt((target - fct2)**2) | |
elif metric == 'mae': | |
e1 = np.absolute(target - fct1) | |
e2 = np.absolute(target - fct2) | |
# difference in two different errors | |
d = e1 - e2 | |
# Mean of individual errors | |
mean_d = d.mean() | |
# Covariance | |
gamma = autocovariance(d, len(d), 0, mean_d) | |
V_d = gamma/T | |
# Calculate DM-Statistic | |
DM_stat = V_d**(-0.5)*mean_d | |
p_value = 2*t.cdf(-abs(DM_stat), df=T-1) | |
return xr.DataArray(DM_stat), xr.DataArray(p_value) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment