oguiza/Multivariate Time Series Classification - LSST.ipynb

## Multivariate Time Series Classification - LSST.ipynb
{
  "cells": [
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# UEA & UCR Time Series Classification Multivariate Datasets*: LSST\n\n*A. Bagnall, J. Lines, W. Vickers and E. Keogh, The UEA & UCR Time Series Classification Repository,\nwww.timeseriesclassification.com"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Import libraries"
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-11-28T12:05:12.469810Z",
          "end_time": "2018-11-28T12:05:12.707640Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "%reload_ext autoreload\n%autoreload 2\n%matplotlib inline",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-11-28T12:05:12.757079Z",
          "end_time": "2018-11-28T12:05:13.972801Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "from fastai import *\nfrom fastai.vision import *\nimport fastai\nfastai.__version__",
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "'1.0.28'"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-11-28T12:05:13.976274Z",
          "end_time": "2018-11-28T12:05:13.998554Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "import warnings\nwarnings.filterwarnings(\"ignore\")",
      "execution_count": 3,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-11-28T12:05:14.001000Z",
          "end_time": "2018-11-28T12:05:14.069727Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "from tslearn.datasets import extract_from_zip_url\nfrom scipy.io import arff",
      "execution_count": 4,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Prepare time series data"
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-11-28T12:05:15.165526Z",
          "end_time": "2018-11-28T12:05:15.191580Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "source_dir = 'http://www.timeseriesclassification.com/Downloads/'\ntarget_dir='my_data/Downloads/'\nSEL_DATASET = 'LSST'",
      "execution_count": 5,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-11-28T12:05:16.133809Z",
          "end_time": "2018-11-28T12:05:19.558812Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "extract_from_zip_url(\n    source_dir + SEL_DATASET + '.zip',\n    target_dir=target_dir + SEL_DATASET,\n    verbose=True)",
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Successfully extracted file /tmp/tmp6t_001ja/LSST.zip to path my_data/Downloads/LSST\n",
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "execution_count": 6,
          "data": {
            "text/plain": "'my_data/Downloads/LSST'"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-11-28T11:44:35.863131Z",
          "end_time": "2018-11-28T11:44:35.885674Z"
        }
      },
      "cell_type": "markdown",
      "source": "There are 6 time series per sample"
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-11-28T12:05:26.229914Z",
          "end_time": "2018-11-28T12:05:26.830825Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "train_data_dict = {}\ntest_data_dict = {}\nfor i in range(6):\n    train_data_dict[i] = pd.DataFrame(\n        arff.loadarff('my_data/Downloads/LSST/LSSTDimension' + str(i + 1) +\n                      '_TRAIN.arff')[0])\n    test_data_dict[i] = pd.DataFrame(\n        arff.loadarff('my_data/Downloads/LSST/LSSTDimension' + str(i + 1) +\n                      '_TEST.arff')[0])",
      "execution_count": 7,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-11-28T12:05:48.990481Z",
          "end_time": "2018-11-28T12:05:49.017873Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "classes = np.sort(np.unique(train_data_dict[0].iloc[:, -1].values.astype(int)))\nnb_classes = len(classes)\nnb_classes, classes",
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 9,
          "data": {
            "text/plain": "(14, array([ 6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "_draft": {
      "nbviewer_url": "https://gist.github.com/e04f24a82de8215a4d7c4e4de58fa90e"
    },
    "gist": {
      "id": "e04f24a82de8215a4d7c4e4de58fa90e",
      "data": {
        "description": "UCR_Time_Series_Classification_Univariate_Datasets.ipynb",
        "public": true
      }
    },
    "kernelspec": {
      "name": "fastai-v1",
      "display_name": "fastai-v1",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.7.0",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "notify_time": "30",
    "toc": {
      "nav_menu": {},
      "number_sections": true,
      "sideBar": true,
      "skip_h1_title": false,
      "base_numbering": 1,
      "title_cell": "Table of Contents",
      "title_sidebar": "Contents",
      "toc_cell": false,
      "toc_position": {},
      "toc_section_display": true,
      "toc_window_display": false
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}
	{
	"cells": [
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# UEA & UCR Time Series Classification Multivariate Datasets: LSST\n\nA. Bagnall, J. Lines, W. Vickers and E. Keogh, The UEA & UCR Time Series Classification Repository,\nwww.timeseriesclassification.com"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Import libraries"
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-11-28T12:05:12.469810Z",
	"end_time": "2018-11-28T12:05:12.707640Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "%reload_ext autoreload\n%autoreload 2\n%matplotlib inline",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-11-28T12:05:12.757079Z",
	"end_time": "2018-11-28T12:05:13.972801Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "from fastai import \nfrom fastai.vision import \nimport fastai\nfastai.__version__",
	"execution_count": 2,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 2,
	"data": {
	"text/plain": "'1.0.28'"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-11-28T12:05:13.976274Z",
	"end_time": "2018-11-28T12:05:13.998554Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "import warnings\nwarnings.filterwarnings(\"ignore\")",
	"execution_count": 3,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-11-28T12:05:14.001000Z",
	"end_time": "2018-11-28T12:05:14.069727Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "from tslearn.datasets import extract_from_zip_url\nfrom scipy.io import arff",
	"execution_count": 4,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Prepare time series data"
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-11-28T12:05:15.165526Z",
	"end_time": "2018-11-28T12:05:15.191580Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "source_dir = 'http://www.timeseriesclassification.com/Downloads/'\ntarget_dir='my_data/Downloads/'\nSEL_DATASET = 'LSST'",
	"execution_count": 5,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-11-28T12:05:16.133809Z",
	"end_time": "2018-11-28T12:05:19.558812Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "extract_from_zip_url(\n source_dir + SEL_DATASET + '.zip',\n target_dir=target_dir + SEL_DATASET,\n verbose=True)",
	"execution_count": 6,
	"outputs": [
	{
	"output_type": "stream",
	"text": "Successfully extracted file /tmp/tmp6t_001ja/LSST.zip to path my_data/Downloads/LSST\n",
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"execution_count": 6,
	"data": {
	"text/plain": "'my_data/Downloads/LSST'"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-11-28T11:44:35.863131Z",
	"end_time": "2018-11-28T11:44:35.885674Z"
	}
	},
	"cell_type": "markdown",
	"source": "There are 6 time series per sample"
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-11-28T12:05:26.229914Z",
	"end_time": "2018-11-28T12:05:26.830825Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "train_data_dict = {}\ntest_data_dict = {}\nfor i in range(6):\n train_data_dict[i] = pd.DataFrame(\n arff.loadarff('my_data/Downloads/LSST/LSSTDimension' + str(i + 1) +\n '_TRAIN.arff')[0])\n test_data_dict[i] = pd.DataFrame(\n arff.loadarff('my_data/Downloads/LSST/LSSTDimension' + str(i + 1) +\n '_TEST.arff')[0])",
	"execution_count": 7,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-11-28T12:05:48.990481Z",
	"end_time": "2018-11-28T12:05:49.017873Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "classes = np.sort(np.unique(train_data_dict[0].iloc[:, -1].values.astype(int)))\nnb_classes = len(classes)\nnb_classes, classes",
	"execution_count": 9,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 9,
	"data": {
	"text/plain": "(14, array([ 6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95]))"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"_draft": {
	"nbviewer_url": "https://gist.github.com/e04f24a82de8215a4d7c4e4de58fa90e"
	},
	"gist": {
	"id": "e04f24a82de8215a4d7c4e4de58fa90e",
	"data": {
	"description": "UCR_Time_Series_Classification_Univariate_Datasets.ipynb",
	"public": true
	}
	},
	"kernelspec": {
	"name": "fastai-v1",
	"display_name": "fastai-v1",
	"language": "python"
	},
	"language_info": {
	"name": "python",
	"version": "3.7.0",
	"mimetype": "text/x-python",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"file_extension": ".py"
	},
	"notify_time": "30",
	"toc": {
	"nav_menu": {},
	"number_sections": true,
	"sideBar": true,
	"skip_h1_title": false,
	"base_numbering": 1,
	"title_cell": "Table of Contents",
	"title_sidebar": "Contents",
	"toc_cell": false,
	"toc_position": {},
	"toc_section_display": true,
	"toc_window_display": false
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}