Skip to content

Instantly share code, notes, and snippets.

@jiahao87
Last active March 13, 2022 08:14
Show Gist options
  • Save jiahao87/0a3883d2ac5cbe12249e6d73505918df to your computer and use it in GitHub Desktop.
Save jiahao87/0a3883d2ac5cbe12249e6d73505918df to your computer and use it in GitHub Desktop.
Hierarchical Forecasting Reconciliation using OLS Method
import numpy as np
import pandas as pd
import hts # To install: pip install scikit-hts
import collections
from scipy.optimize import lsq_linear
hts_df = pd.DataFrame([{'total': 14,
'CA': 5.4, 'TX': 1.8, 'WI': 5.9,
'CA_1': 0.8, 'CA_2': 0.6, 'CA_3': 0.9, 'CA_4': 0.3,
'TX_1': 0.03, 'TX_2': 0.5, 'TX_3': 0.5,
'WI_1': 1.6, 'WI_2': 1.2, 'WI_3': 1.5
}])
print(hts_df)
states = ['CA', 'TX', 'WI']
stores = ['CA_1', 'CA_2', 'CA_3', 'CA_4', 'TX_1', 'TX_2', 'TX_3', 'WI_1', 'WI_2', 'WI_3']
# build the hierarchy tree as a dictionary
total = {'total': list(states)}
state_h = {k: [v for v in stores if v.startswith(k)] for k in states}
hierarchy = {**total, **state_h}
tree = hts.hierarchy.HierarchyTree.from_nodes(nodes=hierarchy, df=hts_df)
sum_mat, sum_mat_labels = hts.functions.to_sum_mat(tree)
pred_dict = collections.OrderedDict()
for label in sum_mat_labels:
pred_dict[label] = pd.DataFrame(data=hts_df[label].values, columns=['yhat'])
# perform forecast reconciliation
revised = hts.functions.optimal_combination(pred_dict, sum_mat, method='OLS', mse={})
revised_forecasts = pd.DataFrame(data=revised,
index=hts_df.index,
columns=sum_mat_labels)
print(revised_forecasts)
######################################################
####### For non-negative reconciled forecasts #######
######################################################
hat_mat = hts.functions.y_hat_matrix(pred_dict)
revised_nnls = np.dot(sum_mat, lsq_linear(sum_mat, hat_mat.flatten(), bounds=(0, np.inf))['x'])
revised_forecasts_nnls = pd.DataFrame(data=revised_nnls.reshape(1,-1),
index=hts_df.index,
columns=sum_mat_labels)
print(revised_forecasts_nnls)
# Note: In this toy example, the non-negative reconciled forecasts are the same
# as the normal reconciled forecasts as the unconstrained least-squares solution
# already fulfills the bounded constraints.
#
# To see an example where the non-negative least squares helped to prevent
# negative forecasts, you can try with the following figures:
# hts_df = pd.DataFrame([{'total': 2,
# 'CA': 1.4, 'TX': 1.8, 'WI': 1.9,
# 'CA_1': 0.8, 'CA_2': 0.6, 'CA_3': 0.9, 'CA_4': 0.3,
# 'TX_1': 0.03, 'TX_2': 0.5, 'TX_3': 0.5,
# 'WI_1': 1.6, 'WI_2': 1.2, 'WI_3': 1.5
# }])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment