Skip to content

Instantly share code, notes, and snippets.

@ngupta23
Last active November 21, 2021 16:08
Show Gist options
  • Save ngupta23/d876b55fdfc47e5573b209d3aa5be222 to your computer and use it in GitHub Desktop.
Save ngupta23/d876b55fdfc47e5573b209d3aa5be222 to your computer and use it in GitHub Desktop.
pycaret_ts_sktime_darts.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "pycaret_ts_sktime_darts.ipynb",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/ngupta23/d876b55fdfc47e5573b209d3aa5be222/pycaret_ts_sktime_darts.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "GWDARV7nIEJ9",
"outputId": "a7e3f6e0-2054-48d8-fb64-81fda1bb09a6"
},
"source": [
"try:\n",
" import darts\n",
"except:\n",
" !pip install darts"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n",
" defaults = yaml.load(f)\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "iiHG42TGJf6K"
},
"source": [
"try:\n",
" import pycaret\n",
"except:\n",
" !pip install pycaret-ts-alpha"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "SKPONey9IF3b"
},
"source": [
"import sys\n",
"import time\n",
"import os\n",
"import pandas as pd\n",
"import numpy as np\n",
"from numpy.random import RandomState\n",
"import matplotlib.pyplot as plt\n",
"from datetime import datetime\n",
"from functools import reduce\n",
"from typing import Union, List, Optional, Dict\n",
"\n",
"from darts import TimeSeries\n",
"from darts.models import NaiveSeasonal\n",
"\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"import logging\n",
"logging.disable(logging.CRITICAL)"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "WKAYfuvAUuqp"
},
"source": [
"## DARTS Adapter"
]
},
{
"cell_type": "code",
"metadata": {
"id": "b28MXBD0JAH4"
},
"source": [
"from sktime.forecasting.base import BaseForecaster\n",
"\n",
"class _DartsAdapter(BaseForecaster):\n",
" \"\"\"Base class for interfacing statsmodels forecasting algorithms.\"\"\"\n",
" # https://github.com/alan-turing-institute/sktime/blob/v0.8.0/extension_templates/forecasting.py\n",
" _tags = {\n",
" \"scitype:y\": \"univariate\", # which y are fine? univariate/multivariate/both\n",
" \"univariate-only\": True, # does estimator use the exogeneous X?\n",
" \"handles-missing-data\": False, # can estimator handle missing data?\n",
" \"y_inner_mtype\": \"pd.DataFrame\", # which types do _fit, _predict, assume for y?\n",
" \"X_inner_mtype\": \"pd.DataFrame\", # which types do _fit, _predict, assume for X?\n",
" \"requires-fh-in-fit\": False, # is forecasting horizon already required in fit?\n",
" \"X-y-must-have-same-index\": True, # can estimator handle different X/y index?\n",
" \"enforce-index-type\": None, # index type that needs to be enforced in X/y\n",
" \"capability:pred_int\": False,\n",
" }\n",
"\n",
" _fitted_param_names = ()\n",
" \n",
" def __init__(self):\n",
" self._forecaster = None\n",
" self._fitted_forecaster = None\n",
" super(_DartsAdapter, self).__init__()\n",
" \n",
"\n",
" def _fit(self, y, X=None, fh=None):\n",
" \"\"\"Fit to training data.\n",
" Parameters\n",
" ----------\n",
" y : pd.Series\n",
" Target time series to which to fit the forecaster.\n",
" fh : int, list or np.array, optional (default=None)\n",
" The forecasters horizon with the steps ahead to to predict.\n",
" X : pd.DataFrame, optional (default=None)\n",
" Exogenous variables are ignored\n",
" Returns\n",
" -------\n",
" self : returns an instance of self.\n",
" \"\"\"\n",
" self._forecaster = self._instantiate_model()\n",
"\n",
" from darts import TimeSeries\n",
" # Darts needs a datetime index (originally passed as PeriodIndex)\n",
" y.index = y.index.astype('datetime64[ns]') \n",
" y = pd.DataFrame(y)\n",
" y_ts = TimeSeries.from_dataframe(y)\n",
" self._forecaster.fit(y_ts)\n",
" \n",
" # this should happen last\n",
" self._is_fitted = True\n",
"\n",
" return self\n",
"\n",
" def _predict(self, fh=None, X=None, return_pred_int=False, alpha=0.05):\n",
" self.check_is_fitted()\n",
" \n",
" # Temporary hack assuming continuous values without any gap.\n",
" # Will need more work\n",
" h = len(list(fh))\n",
" # print(h)\n",
" y = self._forecaster.predict(h).pd_dataframe()\n",
" return y"
],
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "wiO42sPqLCry"
},
"source": [
"class Naive(_DartsAdapter):\n",
" def __init__(\n",
" self,\n",
" K=1,\n",
" ):\n",
" self.K = K\n",
" \n",
" super(Naive, self).__init__()\n",
"\n",
" def _instantiate_model(self):\n",
" # import inside method to avoid hard dependency\n",
" from darts.models import NaiveSeasonal as _NaiveSeasonal\n",
" return _NaiveSeasonal(K = self.K)\n"
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "fufH9Lca8oTy"
},
"source": [
"from darts.models import NBEATSModel\n",
"NBEATSModel?"
],
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "IfY7s7Xxz-Fo"
},
"source": [
"# Get Parameters from here:\n",
"# https://github.com/unit8co/darts/blob/dec5f8c56af92254fdb1d1079f3e9d95b8c91f2b/darts/models/forecasting/nbeats.py#L411\n",
"\n",
"class NBEATSModel(_DartsAdapter):\n",
" import torch\n",
" import torch.nn as nn\n",
" from darts.utils.likelihood_models import Likelihood\n",
"\n",
" def __init__(\n",
" self,\n",
" input_chunk_length: int,\n",
" output_chunk_length: int,\n",
" generic_architecture: bool = True,\n",
" num_stacks: int = 30,\n",
" num_blocks: int = 1,\n",
" num_layers: int = 4,\n",
" layer_widths: Union[int, List[int]] = 256,\n",
"\n",
" # Add to model ----\n",
" expansion_coefficient_dim: int = 5,\n",
" trend_polynomial_degree: int = 2,\n",
" likelihood: Optional[Likelihood] = None,\n",
" random_state: Optional[Union[int, RandomState]] = None,\n",
"\n",
" batch_size: int = 32,\n",
" n_epochs: int = 100,\n",
" optimizer_cls: torch.optim.Optimizer = torch.optim.Adam,\n",
" optimizer_kwargs: Optional[Dict] = None,\n",
" lr_scheduler_cls: torch.optim.lr_scheduler._LRScheduler = None,\n",
" lr_scheduler_kwargs: Optional[Dict] = None,\n",
" loss_fn: nn.modules.loss._Loss = nn.MSELoss(),\n",
" model_name: str = None,\n",
" work_dir: str = os.getcwd(),\n",
" log_tensorboard: bool = False,\n",
" nr_epochs_val_period: int = 10,\n",
" torch_device_str: Optional[str] = None,\n",
" force_reset=False,\n",
" save_checkpoints=False\n",
" ):\n",
" self.input_chunk_length=input_chunk_length\n",
" self.output_chunk_length=output_chunk_length\n",
" self.generic_architecture=generic_architecture\n",
" self.num_stacks=num_stacks\n",
" self.num_blocks=num_blocks\n",
" self.num_layers=num_layers\n",
" self.layer_widths=layer_widths\n",
"\n",
" self.expansion_coefficient_dim = expansion_coefficient_dim\n",
" self.trend_polynomial_degree = trend_polynomial_degree\n",
" self.likelihood = likelihood\n",
" self.random_state = random_state\n",
"\n",
" self.batch_size=batch_size\n",
" self.n_epochs=n_epochs\n",
" self.optimizer_cls=optimizer_cls\n",
" self.optimizer_kwargs=optimizer_kwargs\n",
" self.lr_scheduler_cls=lr_scheduler_cls\n",
" self.lr_scheduler_kwargs=lr_scheduler_kwargs\n",
" self.loss_fn=loss_fn\n",
" self.model_name=model_name\n",
" self.work_dir=work_dir\n",
" self.log_tensorboard=log_tensorboard\n",
" self.nr_epochs_val_period=nr_epochs_val_period\n",
" self.torch_device_str=torch_device_str\n",
" self.force_reset=force_reset\n",
" self.save_checkpoints=save_checkpoints\n",
" \n",
" super(NBEATSModel, self).__init__()\n",
"\n",
" def _instantiate_model(self):\n",
" # import inside method to avoid hard dependency\n",
" from darts.models import NBEATSModel as _NBEATSModel\n",
" \n",
" return _NBEATSModel(\n",
" input_chunk_length=self.input_chunk_length,\n",
" output_chunk_length=self.output_chunk_length,\n",
" generic_architecture=self.generic_architecture,\n",
" num_stacks=self.num_stacks,\n",
" num_blocks=self.num_blocks,\n",
" num_layers=self.num_layers,\n",
" layer_widths=self.layer_widths,\n",
" n_epochs=self.n_epochs,\n",
" nr_epochs_val_period=self.nr_epochs_val_period,\n",
" batch_size=self.batch_size,\n",
" model_name=self.model_name\n",
" )\n",
"\n"
],
"execution_count": 7,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "L15ogZhnUqkI"
},
"source": [
"## Get Dataset"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 384
},
"id": "upb8xabYLvwr",
"outputId": "794c07f3-bbe0-4000-9cf1-4836dc079735"
},
"source": [
"from pycaret.datasets import get_data\n",
"y = get_data(\"airline\")\n",
"train = y[:-36]\n",
"test = y[-36:]\n",
"train.index, test.index"
],
"execution_count": 8,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Period\n",
"1949-01 112.0\n",
"1949-02 118.0\n",
"1949-03 132.0\n",
"1949-04 129.0\n",
"1949-05 121.0\n",
"Freq: M, Name: Number of airline passengers, dtype: float64"
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(PeriodIndex(['1949-01', '1949-02', '1949-03', '1949-04', '1949-05', '1949-06',\n",
" '1949-07', '1949-08', '1949-09', '1949-10',\n",
" ...\n",
" '1957-03', '1957-04', '1957-05', '1957-06', '1957-07', '1957-08',\n",
" '1957-09', '1957-10', '1957-11', '1957-12'],\n",
" dtype='period[M]', name='Period', length=108),\n",
" PeriodIndex(['1958-01', '1958-02', '1958-03', '1958-04', '1958-05', '1958-06',\n",
" '1958-07', '1958-08', '1958-09', '1958-10', '1958-11', '1958-12',\n",
" '1959-01', '1959-02', '1959-03', '1959-04', '1959-05', '1959-06',\n",
" '1959-07', '1959-08', '1959-09', '1959-10', '1959-11', '1959-12',\n",
" '1960-01', '1960-02', '1960-03', '1960-04', '1960-05', '1960-06',\n",
" '1960-07', '1960-08', '1960-09', '1960-10', '1960-11', '1960-12'],\n",
" dtype='period[M]', name='Period'))"
]
},
"metadata": {},
"execution_count": 8
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1zJtsxLnUzM4"
},
"source": [
"## Naive Model"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ZIKQdO-oLxWj",
"outputId": "6b18aff7-6322-4ccd-f0fe-e0b428ee30a4"
},
"source": [
"model1 = Naive()\n",
"model1.fit(train)\n",
"\n",
"# #### Preiction works with underlying DARTS model ----\n",
"# model1._forecaster.predict(36).values().T\n",
"\n",
"#### Prediction works with sktime adapter now ----\n",
"predictions = model1.predict(fh=np.arange(1, 37))\n",
"type(predictions), predictions.shape"
],
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(pandas.core.series.Series, (36,))"
]
},
"metadata": {},
"execution_count": 9
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ziVKlX2N0wCZ"
},
"source": [
"###NBests Model"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8C2z9PX5xhIg",
"outputId": "5512f858-cc78-421d-8f92-4b906484bcc7"
},
"source": [
"model2a = NBEATSModel(\n",
" input_chunk_length=30,\n",
" output_chunk_length=7,\n",
" generic_architecture=True,\n",
" num_stacks=10,\n",
" num_blocks=1,\n",
" num_layers=4,\n",
" layer_widths=512,\n",
" n_epochs=100,\n",
" nr_epochs_val_period=1,\n",
" batch_size=800,\n",
" model_name='nbeats_run'\n",
")\n",
"model2a.fit(train)"
],
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"NBEATSModel(batch_size=800, input_chunk_length=30, layer_widths=512,\n",
" model_name='nbeats_run', nr_epochs_val_period=1, num_stacks=10,\n",
" output_chunk_length=7)"
]
},
"metadata": {},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "RT5uRc85KIpY",
"outputId": "cbe22763-f6aa-42ff-f0f2-174d71d7e9c8"
},
"source": [
"predictions = model2a.predict(fh=np.arange(1, 37))\n",
"type(predictions), predictions.shape"
],
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(pandas.core.series.Series, (36,))"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jEdQkXqLKsHI",
"outputId": "f1faa607-47f9-4d1b-edcc-0da9824432a0"
},
"source": [
"model2b = NBEATSModel(\n",
" input_chunk_length=30,\n",
" output_chunk_length=7,\n",
" generic_architecture=False,\n",
" num_blocks=3,\n",
" num_layers=4,\n",
" layer_widths=512,\n",
" n_epochs=100,\n",
" nr_epochs_val_period=1,\n",
" batch_size=800,\n",
" model_name='nbeats_interpretable_run'\n",
")\n",
"model2b.fit(train)"
],
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"NBEATSModel(batch_size=800, generic_architecture=False, input_chunk_length=30,\n",
" layer_widths=512, model_name='nbeats_interpretable_run',\n",
" nr_epochs_val_period=1, num_blocks=3, output_chunk_length=7)"
]
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "bpaD93CcQcMG",
"outputId": "c6e14acd-64c1-4300-9a9d-64bdb7d0a9fd",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"source": [
"predictions = model2b.predict(fh=np.arange(1, 37))\n",
"type(predictions), predictions.shape"
],
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(pandas.core.series.Series, (36,))"
]
},
"metadata": {},
"execution_count": 14
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vOjJ9dTUUQoI"
},
"source": [
"## Backup"
]
},
{
"cell_type": "code",
"metadata": {
"id": "LSgxwiE-QREc",
"outputId": "0ac99de1-57ac-42e0-8c5a-8e9bb634e487",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 117
}
},
"source": [
"sys.exit()"
],
"execution_count": 13,
"outputs": [
{
"output_type": "error",
"ename": "SystemExit",
"evalue": "ignored",
"traceback": [
"An exception has occurred, use %tb to see the full traceback.\n",
"\u001b[0;31mSystemExit\u001b[0m\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "zwH88xAX66r-"
},
"source": [
"from darts import TimeSeries\n",
"from darts.models import RNNModel\n",
"from darts.models import NBEATSModel"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "uo0vEd9BURfc"
},
"source": [
"from darts.metrics import mape, mase\n",
"from darts.utils.statistics import check_seasonality, plot_acf, plot_residuals_analysis, plot_hist\n",
"from darts.datasets import AirPassengersDataset"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "wq5dULjXPnt-"
},
"source": [
"series = AirPassengersDataset().load()\n",
"series.plot()"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "vqRcfIEwUVXr"
},
"source": [
"train, val = series.split_before(pd.Timestamp('19580101'))\n",
"train.plot(label='training')\n",
"val.plot(label='validation')\n",
"plt.legend()"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Px8t6wA0UYFc"
},
"source": [
"naive_model = NaiveSeasonal(K=1)\n",
"naive_model.fit(train)\n",
"naive_forecast = naive_model.predict(36)\n",
"\n",
"series.plot(label='actual')\n",
"naive_forecast.plot(label='naive forecast (K=1)')\n",
"plt.legend();"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "kzwtw4mP29Or"
},
"source": [
"NBEATSModel?"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "1QfYgjYj25PA"
},
"source": [
"from darts.models import NBEATSModel\n",
"model_nbeats = NBEATSModel(\n",
" input_chunk_length=30,\n",
" output_chunk_length=7,\n",
" generic_architecture=True,\n",
" # num_stacks=10,\n",
" num_blocks=1,\n",
" num_layers=4,\n",
" layer_widths=512,\n",
" n_epochs=100,\n",
" nr_epochs_val_period=1,\n",
" batch_size=800,\n",
" model_name='nbeats_run'\n",
")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "aFdN4Uqf252S"
},
"source": [
"model_nbeats.fit(train, verbose=True)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "lhWg3S8B7W42"
},
"source": [
"model_nbeats.predict(n=36)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "lpywP8GBLZRc"
},
"source": [
"model_nbeats = NBEATSModel(\n",
" input_chunk_length=30,\n",
" output_chunk_length=7,\n",
" generic_architecture=False,\n",
" num_blocks=3,\n",
" num_layers=4,\n",
" layer_widths=512,\n",
" n_epochs=100,\n",
" nr_epochs_val_period=1,\n",
" batch_size=800,\n",
" model_name='nbeats_interpretable_run'\n",
")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "pC8At8mKLhTg"
},
"source": [
"model_nbeats.fit(train, verbose=True)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "7WBlkxHgUaqs"
},
"source": [
"class RNNModel(_DartsAdapter):\n",
" def __init__(\n",
" self,\n",
" model='LSTM',\n",
" hidden_dim=20,\n",
" dropout=0,\n",
" batch_size=16,\n",
" n_epochs=300,\n",
" optimizer_kwargs={'lr': 1e-3}, \n",
" model_name='Air_RNN',\n",
" log_tensorboard=True,\n",
" random_state=42,\n",
" training_length=20,\n",
" input_chunk_length=14,\n",
" force_reset=True,\n",
" save_checkpoints=True\n",
" ):\n",
" self.model=model,\n",
" self.hidden_dim=hidden_dim,\n",
" self.dropout=dropout,\n",
" self.batch_size=batch_size,\n",
" self.n_epochs=n_epochs,\n",
" self.optimizer_kwargs=optimizer_kwargs, \n",
" self.model_name=model_name,\n",
" self.log_tensorboard=log_tensorboard,\n",
" self.random_state=random_state,\n",
" self.training_length=training_length,\n",
" self.input_chunk_length=input_chunk_length,\n",
" self.force_reset=force_reset,\n",
" self.save_checkpoints=save_checkpoints\n",
" \n",
" super(RNNModel, self).__init__()\n",
"\n",
" def _instantiate_model(self):\n",
" # import inside method to avoid hard dependency\n",
" from darts.models import RNNModel as _RNNModel\n",
" return _RNNModel(\n",
" model=self.model,\n",
" hidden_dim=self.hidden_dim,\n",
" dropout=self.dropout,\n",
" batch_size=self.batch_size,\n",
" n_epochs=self.n_epochs,\n",
" optimizer_kwargs=self.optimizer_kwargs, \n",
" model_name=self.model_name,\n",
" log_tensorboard=self.log_tensorboard,\n",
" random_state=self.random_state,\n",
" training_length=self.training_length,\n",
" input_chunk_length=self.input_chunk_length,\n",
" force_reset=self.force_reset,\n",
" save_checkpoints=self.save_checkpoints\n",
" )"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Br19Z6Vg-Ito"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment