yanniskatsaros/autoregressive.py

## autoregressive.py
"""
A pure auto-regressive model from scratch. (WIP)
--------------------------------------------------

The current implementation is incorrect. A proper
auto-regressive model uses $p$ "lags" of the variable
from $x_t$ to $x_{t-p}$ each with its own coefficient.

An auto-regressive model of order $p$ is written as:
$x_t = \sum_{i=1}^{p} \alpha_i x_{t-i} + \epsilon_t

See:
https://en.wikipedia.org/wiki/Autoregressive_model

"""
import numpy as np
import matplotlib.pyplot as plt

# TODO - can just roll-up our own
from sklearn.linear_model import LinearRegression

class AutoRegressive:
    def __init__(self, n_diffs: int, n_lags: int):
        self.n_diffs = n_diffs
        self.n_lags = n_lags
        self._fit = False

    def _validate_input(self, x: ndarray) -> ndarray:
        out = np.array(x)

        if out.ndim != 1:
            if out.shape[0] != 1 and out.shape[1] != 1:
                raise ValueError('Input must be a one-dimensional `array_like` object')

        return out.flatten()

    def _difference(self, x: ndarray, n: int) -> ndarray:
        out = x.copy()
        for _ in range(n):
            out = np.diff(out)

        return out

    def _shift(self, x: ndarray, n: int) -> ndarray:
        out = np.full(x.shape, np.nan)
        out[n:] = x[:-n]

        return out

    def _reverse_shift(self, x: ndarray) -> ndarray:
        raise NotImplementedError

    def fit(self, x: ndarray, loss: str='L1'):
        """
        Fit an auto-regressive model to the data.

        Parameters
        ----------
        x: numpy.ndarray
            The predictor variable

        loss: str; default = 'L1'
            The loss function to use for the regression.
            Options include: {"L1", "L2", "Huber"}

        """
        # FIXME - implement functionality for loss

        _x = self._validate_input(x)

        # begin by performing differencing of the data
        _x = self._difference(_x, self.n_diffs)

        # shift by the specified lags to produce our target
        _y = self._shift(_x, self.n_lags)

        # clip both and reshape for sklearn
        _x = _x[self.n_diffs + self.n_lags:].reshape(-1, 1)
        _y = _y[self.n_diffs + self.n_lags:].reshape(-1, 1)

        # apply least squares
        self._lr = LinearRegression()
        self._lr.fit(_x, _y)

        # save the fitted vars
        self._x = _x
        self._y = _y
        self._fit = True

    def predict(self, x: ndarray) -> ndarray:
        # need to transform the input data first
        _x = self._validate_input(x)

        # begin by performing differencing of the data
        _x = self._difference(_x, self.n_diffs)

        raise NotImplementedError

    def plot_diagnostic(self):
        if not self._fit:
            raise ValueError('Cannot plot diagnostics. Run `self.fit()` first.')

        fig, ax = plt.subplots(figsize=(8, 5))

        x_hat = np.linspace(self._x.min(), self._y.max(), num=1000).reshape(-1, 1)
        y_hat = self._lr.predict(x_hat)
        score = self._lr.score(self._x, self._y)

        ax.scatter(self._x, self._y, color='black', alpha=0.25)
        ax.plot(x_hat, y_hat, label='Least Squares')

        ax.set_ylabel(f'Order Difference: {self.n_diffs} - {self.n_lags} Lags')
        ax.set_xlabel(f'Order Difference: {self.n_diffs}')
        ax.set_title(f'$R^2 = {score:.4f}$')
        ax.legend()

        sns.despine()
        return ax
	"""
	A pure auto-regressive model from scratch. (WIP)
	--------------------------------------------------

	The current implementation is incorrect. A proper
	auto-regressive model uses $p$ "lags" of the variable
	from $x_t$ to $x_{t-p}$ each with its own coefficient.

	An auto-regressive model of order $p$ is written as:
	$x_t = \sum_{i=1}^{p} \alpha_i x_{t-i} + \epsilon_t

	See:
	https://en.wikipedia.org/wiki/Autoregressive_model

	"""
	import numpy as np
	import matplotlib.pyplot as plt

	# TODO - can just roll-up our own
	from sklearn.linear_model import LinearRegression

	class AutoRegressive:
	def __init__(self, n_diffs: int, n_lags: int):
	self.n_diffs = n_diffs
	self.n_lags = n_lags
	self._fit = False

	def _validate_input(self, x: ndarray) -> ndarray:
	out = np.array(x)

	if out.ndim != 1:
	if out.shape[0] != 1 and out.shape[1] != 1:
	raise ValueError('Input must be a one-dimensional `array_like` object')

	return out.flatten()

	def _difference(self, x: ndarray, n: int) -> ndarray:
	out = x.copy()
	for _ in range(n):
	out = np.diff(out)

	return out

	def _shift(self, x: ndarray, n: int) -> ndarray:
	out = np.full(x.shape, np.nan)
	out[n:] = x[:-n]

	return out

	def _reverse_shift(self, x: ndarray) -> ndarray:
	raise NotImplementedError

	def fit(self, x: ndarray, loss: str='L1'):
	"""
	Fit an auto-regressive model to the data.

	Parameters
	----------
	x: numpy.ndarray
	The predictor variable

	loss: str; default = 'L1'
	The loss function to use for the regression.
	Options include: {"L1", "L2", "Huber"}

	"""
	# FIXME - implement functionality for loss

	_x = self._validate_input(x)

	# begin by performing differencing of the data
	_x = self._difference(_x, self.n_diffs)

	# shift by the specified lags to produce our target
	_y = self._shift(_x, self.n_lags)

	# clip both and reshape for sklearn
	_x = _x[self.n_diffs + self.n_lags:].reshape(-1, 1)
	_y = _y[self.n_diffs + self.n_lags:].reshape(-1, 1)

	# apply least squares
	self._lr = LinearRegression()
	self._lr.fit(_x, _y)

	# save the fitted vars
	self._x = _x
	self._y = _y
	self._fit = True

	def predict(self, x: ndarray) -> ndarray:
	# need to transform the input data first
	_x = self._validate_input(x)

	# begin by performing differencing of the data
	_x = self._difference(_x, self.n_diffs)

	raise NotImplementedError

	def plot_diagnostic(self):
	if not self._fit:
	raise ValueError('Cannot plot diagnostics. Run `self.fit()` first.')

	fig, ax = plt.subplots(figsize=(8, 5))

	x_hat = np.linspace(self._x.min(), self._y.max(), num=1000).reshape(-1, 1)
	y_hat = self._lr.predict(x_hat)
	score = self._lr.score(self._x, self._y)

	ax.scatter(self._x, self._y, color='black', alpha=0.25)
	ax.plot(x_hat, y_hat, label='Least Squares')

	ax.set_ylabel(f'Order Difference: {self.n_diffs} - {self.n_lags} Lags')
	ax.set_xlabel(f'Order Difference: {self.n_diffs}')
	ax.set_title(f'$R^2 = {score:.4f}$')
	ax.legend()

	sns.despine()
	return ax