Created
May 1, 2020 15:24
-
-
Save yanniskatsaros/fec0924d5a9378a51a57be37dd864de5 to your computer and use it in GitHub Desktop.
A pure autoregressive model from scratch with Python and numpy.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A pure auto-regressive model from scratch. (WIP) | |
-------------------------------------------------- | |
The current implementation is incorrect. A proper | |
auto-regressive model uses $p$ "lags" of the variable | |
from $x_t$ to $x_{t-p}$ each with its own coefficient. | |
An auto-regressive model of order $p$ is written as: | |
$x_t = \sum_{i=1}^{p} \alpha_i x_{t-i} + \epsilon_t | |
See: | |
https://en.wikipedia.org/wiki/Autoregressive_model | |
""" | |
import numpy as np | |
import matplotlib.pyplot as plt | |
# TODO - can just roll-up our own | |
from sklearn.linear_model import LinearRegression | |
class AutoRegressive: | |
def __init__(self, n_diffs: int, n_lags: int): | |
self.n_diffs = n_diffs | |
self.n_lags = n_lags | |
self._fit = False | |
def _validate_input(self, x: ndarray) -> ndarray: | |
out = np.array(x) | |
if out.ndim != 1: | |
if out.shape[0] != 1 and out.shape[1] != 1: | |
raise ValueError('Input must be a one-dimensional `array_like` object') | |
return out.flatten() | |
def _difference(self, x: ndarray, n: int) -> ndarray: | |
out = x.copy() | |
for _ in range(n): | |
out = np.diff(out) | |
return out | |
def _shift(self, x: ndarray, n: int) -> ndarray: | |
out = np.full(x.shape, np.nan) | |
out[n:] = x[:-n] | |
return out | |
def _reverse_shift(self, x: ndarray) -> ndarray: | |
raise NotImplementedError | |
def fit(self, x: ndarray, loss: str='L1'): | |
""" | |
Fit an auto-regressive model to the data. | |
Parameters | |
---------- | |
x: numpy.ndarray | |
The predictor variable | |
loss: str; default = 'L1' | |
The loss function to use for the regression. | |
Options include: {"L1", "L2", "Huber"} | |
""" | |
# FIXME - implement functionality for loss | |
_x = self._validate_input(x) | |
# begin by performing differencing of the data | |
_x = self._difference(_x, self.n_diffs) | |
# shift by the specified lags to produce our target | |
_y = self._shift(_x, self.n_lags) | |
# clip both and reshape for sklearn | |
_x = _x[self.n_diffs + self.n_lags:].reshape(-1, 1) | |
_y = _y[self.n_diffs + self.n_lags:].reshape(-1, 1) | |
# apply least squares | |
self._lr = LinearRegression() | |
self._lr.fit(_x, _y) | |
# save the fitted vars | |
self._x = _x | |
self._y = _y | |
self._fit = True | |
def predict(self, x: ndarray) -> ndarray: | |
# need to transform the input data first | |
_x = self._validate_input(x) | |
# begin by performing differencing of the data | |
_x = self._difference(_x, self.n_diffs) | |
raise NotImplementedError | |
def plot_diagnostic(self): | |
if not self._fit: | |
raise ValueError('Cannot plot diagnostics. Run `self.fit()` first.') | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
x_hat = np.linspace(self._x.min(), self._y.max(), num=1000).reshape(-1, 1) | |
y_hat = self._lr.predict(x_hat) | |
score = self._lr.score(self._x, self._y) | |
ax.scatter(self._x, self._y, color='black', alpha=0.25) | |
ax.plot(x_hat, y_hat, label='Least Squares') | |
ax.set_ylabel(f'Order Difference: {self.n_diffs} - {self.n_lags} Lags') | |
ax.set_xlabel(f'Order Difference: {self.n_diffs}') | |
ax.set_title(f'$R^2 = {score:.4f}$') | |
ax.legend() | |
sns.despine() | |
return ax |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment