Skip to content

Instantly share code, notes, and snippets.

@yanniskatsaros
Created May 1, 2020 15:24
Show Gist options
  • Save yanniskatsaros/fec0924d5a9378a51a57be37dd864de5 to your computer and use it in GitHub Desktop.
Save yanniskatsaros/fec0924d5a9378a51a57be37dd864de5 to your computer and use it in GitHub Desktop.
A pure autoregressive model from scratch with Python and numpy.
"""
A pure auto-regressive model from scratch. (WIP)
--------------------------------------------------
The current implementation is incorrect. A proper
auto-regressive model uses $p$ "lags" of the variable
from $x_t$ to $x_{t-p}$ each with its own coefficient.
An auto-regressive model of order $p$ is written as:
$x_t = \sum_{i=1}^{p} \alpha_i x_{t-i} + \epsilon_t
See:
https://en.wikipedia.org/wiki/Autoregressive_model
"""
import numpy as np
import matplotlib.pyplot as plt
# TODO - can just roll-up our own
from sklearn.linear_model import LinearRegression
class AutoRegressive:
def __init__(self, n_diffs: int, n_lags: int):
self.n_diffs = n_diffs
self.n_lags = n_lags
self._fit = False
def _validate_input(self, x: ndarray) -> ndarray:
out = np.array(x)
if out.ndim != 1:
if out.shape[0] != 1 and out.shape[1] != 1:
raise ValueError('Input must be a one-dimensional `array_like` object')
return out.flatten()
def _difference(self, x: ndarray, n: int) -> ndarray:
out = x.copy()
for _ in range(n):
out = np.diff(out)
return out
def _shift(self, x: ndarray, n: int) -> ndarray:
out = np.full(x.shape, np.nan)
out[n:] = x[:-n]
return out
def _reverse_shift(self, x: ndarray) -> ndarray:
raise NotImplementedError
def fit(self, x: ndarray, loss: str='L1'):
"""
Fit an auto-regressive model to the data.
Parameters
----------
x: numpy.ndarray
The predictor variable
loss: str; default = 'L1'
The loss function to use for the regression.
Options include: {"L1", "L2", "Huber"}
"""
# FIXME - implement functionality for loss
_x = self._validate_input(x)
# begin by performing differencing of the data
_x = self._difference(_x, self.n_diffs)
# shift by the specified lags to produce our target
_y = self._shift(_x, self.n_lags)
# clip both and reshape for sklearn
_x = _x[self.n_diffs + self.n_lags:].reshape(-1, 1)
_y = _y[self.n_diffs + self.n_lags:].reshape(-1, 1)
# apply least squares
self._lr = LinearRegression()
self._lr.fit(_x, _y)
# save the fitted vars
self._x = _x
self._y = _y
self._fit = True
def predict(self, x: ndarray) -> ndarray:
# need to transform the input data first
_x = self._validate_input(x)
# begin by performing differencing of the data
_x = self._difference(_x, self.n_diffs)
raise NotImplementedError
def plot_diagnostic(self):
if not self._fit:
raise ValueError('Cannot plot diagnostics. Run `self.fit()` first.')
fig, ax = plt.subplots(figsize=(8, 5))
x_hat = np.linspace(self._x.min(), self._y.max(), num=1000).reshape(-1, 1)
y_hat = self._lr.predict(x_hat)
score = self._lr.score(self._x, self._y)
ax.scatter(self._x, self._y, color='black', alpha=0.25)
ax.plot(x_hat, y_hat, label='Least Squares')
ax.set_ylabel(f'Order Difference: {self.n_diffs} - {self.n_lags} Lags')
ax.set_xlabel(f'Order Difference: {self.n_diffs}')
ax.set_title(f'$R^2 = {score:.4f}$')
ax.legend()
sns.despine()
return ax
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment