Last active
November 28, 2018 12:06
-
-
Save oguiza/e04f24a82de8215a4d7c4e4de58fa90e to your computer and use it in GitHub Desktop.
UCR_Time_Series_Classification_Univariate_Datasets.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "# UEA & UCR Time Series Classification Multivariate Datasets*: LSST\n\n*A. Bagnall, J. Lines, W. Vickers and E. Keogh, The UEA & UCR Time Series Classification Repository,\nwww.timeseriesclassification.com" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Import libraries" | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2018-11-28T12:05:12.469810Z", | |
"end_time": "2018-11-28T12:05:12.707640Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%reload_ext autoreload\n%autoreload 2\n%matplotlib inline", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2018-11-28T12:05:12.757079Z", | |
"end_time": "2018-11-28T12:05:13.972801Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from fastai import *\nfrom fastai.vision import *\nimport fastai\nfastai.__version__", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 2, | |
"data": { | |
"text/plain": "'1.0.28'" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2018-11-28T12:05:13.976274Z", | |
"end_time": "2018-11-28T12:05:13.998554Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import warnings\nwarnings.filterwarnings(\"ignore\")", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2018-11-28T12:05:14.001000Z", | |
"end_time": "2018-11-28T12:05:14.069727Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from tslearn.datasets import extract_from_zip_url\nfrom scipy.io import arff", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Prepare time series data" | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2018-11-28T12:05:15.165526Z", | |
"end_time": "2018-11-28T12:05:15.191580Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "source_dir = 'http://www.timeseriesclassification.com/Downloads/'\ntarget_dir='my_data/Downloads/'\nSEL_DATASET = 'LSST'", | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2018-11-28T12:05:16.133809Z", | |
"end_time": "2018-11-28T12:05:19.558812Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "extract_from_zip_url(\n source_dir + SEL_DATASET + '.zip',\n target_dir=target_dir + SEL_DATASET,\n verbose=True)", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "Successfully extracted file /tmp/tmp6t_001ja/LSST.zip to path my_data/Downloads/LSST\n", | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"execution_count": 6, | |
"data": { | |
"text/plain": "'my_data/Downloads/LSST'" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2018-11-28T11:44:35.863131Z", | |
"end_time": "2018-11-28T11:44:35.885674Z" | |
} | |
}, | |
"cell_type": "markdown", | |
"source": "There are 6 time series per sample" | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2018-11-28T12:05:26.229914Z", | |
"end_time": "2018-11-28T12:05:26.830825Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "train_data_dict = {}\ntest_data_dict = {}\nfor i in range(6):\n train_data_dict[i] = pd.DataFrame(\n arff.loadarff('my_data/Downloads/LSST/LSSTDimension' + str(i + 1) +\n '_TRAIN.arff')[0])\n test_data_dict[i] = pd.DataFrame(\n arff.loadarff('my_data/Downloads/LSST/LSSTDimension' + str(i + 1) +\n '_TEST.arff')[0])", | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2018-11-28T12:05:48.990481Z", | |
"end_time": "2018-11-28T12:05:49.017873Z" | |
}, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "classes = np.sort(np.unique(train_data_dict[0].iloc[:, -1].values.astype(int)))\nnb_classes = len(classes)\nnb_classes, classes", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 9, | |
"data": { | |
"text/plain": "(14, array([ 6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95]))" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/e04f24a82de8215a4d7c4e4de58fa90e" | |
}, | |
"gist": { | |
"id": "e04f24a82de8215a4d7c4e4de58fa90e", | |
"data": { | |
"description": "UCR_Time_Series_Classification_Univariate_Datasets.ipynb", | |
"public": true | |
} | |
}, | |
"kernelspec": { | |
"name": "fastai-v1", | |
"display_name": "fastai-v1", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.7.0", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"notify_time": "30", | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"base_numbering": 1, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment