Skip to content

Instantly share code, notes, and snippets.

@takotab
Created November 26, 2018 12:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takotab/66ba156072b5f54addd1b64b28fa7429 to your computer and use it in GitHub Desktop.
Save takotab/66ba156072b5f54addd1b64b28fa7429 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# CNN Approach to Time-series classification\n",
"\n",
"In this notebook we demonstrate a transformation-based approach to time series classification. A time-series is transformed into an image by one of several methods and the classifier is trained directly on the image data.\n",
"\n",
"The notebook automates preprocess of data on any of the 128 UCR time series classification datasets. Download here:\n",
"http://www.timeseriesclassification.com/index.php, unzip, and set your pwd there to run this notebook as is."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from scipy.io import arff\n",
"import seaborn as sns\n",
"from torch.utils.data import Dataset, DataLoader, ConcatDataset\n",
"from fastai import *\n",
"from fastai.vision import * \n",
"from fastai.vision.data import ImageDataBunch\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import confusion_matrix\n",
"from pyts.image import GASF, GADF, MTF, RecurrencePlots\n",
"import pdb\n",
"from skimage.transform import resize"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'1.0.24'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import fastai\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"fastai.__version__"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"def cleanup(df):\n",
" #df.sample(df.shape[0], replace=False).reset_index(drop=True)\n",
" df.columns = [k for k in range(df.shape[1]-1)]+['target']\n",
" for k in df.columns[:-1]:\n",
" df[k] = df[k].astype('float')\n",
" if df.target.dtype == 'object':\n",
" df['target'] = df['target'].apply(lambda x: x.decode('ascii')).astype('int')\n",
" if sorted(df.target.unique()) != list(np.arange(df.target.nunique())):\n",
" new_targs = pd.DataFrame({'target':df.target.unique()}).reset_index()\n",
" df = pd.merge(df, new_targs, left_on='target', right_on='target').drop('target',axis=1).rename(columns={'index':'target'})\n",
" ts = pd.melt(df.reset_index(), id_vars=['index','target'], var_name='time').rename(columns={'index':'id'})\n",
" ts = ts.groupby(['id','time','target']).value.mean().reset_index()\n",
" return df, ts\n",
"\n",
"def graph_ts(ts):\n",
" for k in sorted(ts.target.unique()):\n",
" fig, axes = plt.subplots(figsize=(15,5))\n",
" sns.tsplot(ts[ts.target == k], time='time', unit='id', condition='target', value='value', err_style='unit_traces', ax=axes) \n",
" fig, axes = plt.subplots(figsize=(15,5))\n",
" sns.tsplot(ts, time='time', unit='id', condition='target', value='value', err_style='unit_traces', ax=axes)\n",
" return None\n",
"\n",
"def prep_data(task='Oliveoil', cmap='rainbow', method='GASF', image_size=224, graph=False):\n",
" path = Path('TSC/%s'%(task))\n",
" for phase in ['TRAIN','TEST']:\n",
" if graph:\n",
" graph_ts(ts)\n",
" labels = df.target.unique()\n",
" if method == 'GASF':\n",
" transformer = GASF(image_size=image_size)\n",
" elif method == 'GADF':\n",
" transformer = GADF(image_size=image_size)\n",
" elif method == 'MTF':\n",
" transformer = MTF(image_size=image_size)\n",
" elif method == 'RP':\n",
" transformer = RecurrencePlots(dimension=1, epsilon=None, percentage=10)\n",
" else:\n",
" method = 'Unaltered'\n",
" method_path = path/f'{method}'\n",
" method_path.mkdir(exist_ok=True)\n",
" phase_path = method_path/f'{phase}'\n",
" phase_path.mkdir(exist_ok=True)\n",
" if method != 'Unaltered':\n",
" image_data = transformer.fit_transform(df[df.columns[:-1]])\n",
" image_data = resize(image_data, (image_data.shape[0],image_size,image_size))\n",
" for label in labels:\n",
" label_path = phase_path/f'{label}'\n",
" label_path.mkdir(exist_ok=True)\n",
" sub_df = df[df.target == label]\n",
" idxs = sub_df.index\n",
" for idx in idxs:\n",
" plt.imsave(label_path/f'{idx}.png', image_data[idx], cmap=cmap)\n",
" else: \n",
" for label in labels:\n",
" label_path = phase_path/f'{label}'\n",
" label_path.mkdir(exist_ok=True)\n",
" sub_df = df[df.target == label]\n",
" idxs = sub_df.index\n",
" for idx in idxs:\n",
" plt.figure(figsize=(10,10))\n",
" plt.plot(df.iloc[idx].values)\n",
" plt.savefig(label_path/f'{idx}')\n",
" plt.close('all') \n",
" return None\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
"task='Earthquakes'\n",
"method = 'RP'"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"prep_data(task=task, method=method, image_size=128, graph=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"phase = 'TRAIN' \n",
"data = arff.loadarff('TSC/%s_%s.arff'%(task,phase))\n",
"df = pd.DataFrame(data[0])\n",
"df, ts = cleanup(df)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(322, 513)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"method = 'MTF'"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([48, 3, 128, 128])"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x,y = next(iter(data.valid_dl)) ; x.size()"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"path = Path('TSC/Earthquakes/RP/TEST/0')"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"264"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(path.ls())"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"path = Path('TSC/%s/%s'%(task,method))\n",
"data = (ImageItemList.from_folder(path) \n",
" .split_by_folder(train='TRAIN', valid='TEST')\n",
" .label_from_folder()\n",
" .transform().databunch(bs=32))"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"cnn = create_cnn(data, models.resnet34, metrics=accuracy)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"cnn.lr_find(); cnn.recorder.plot()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total time: 00:06\n",
"epoch train_loss valid_loss accuracy\n",
"1 0.785134 0.651331 0.798137 (00:01)\n",
"2 0.736826 0.673580 0.807453 (00:01)\n",
"3 0.682513 0.878728 0.785714 (00:01)\n",
"4 0.634076 0.483826 0.835404 (00:01)\n",
"5 0.602548 0.438748 0.838509 (00:01)\n",
"\n"
]
}
],
"source": [
"cnn.fit_one_cycle(5, max_lr=2e-3)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"cnn.unfreeze() ; cnn.lr_find() ; cnn.recorder.plot()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total time: 00:13\n",
"epoch train_loss valid_loss accuracy\n",
"1 0.475293 0.405666 0.854037 (00:01)\n",
"2 0.471815 0.400623 0.857143 (00:01)\n",
"3 0.444360 0.483583 0.860248 (00:01)\n",
"4 0.455074 0.437068 0.869565 (00:01)\n",
"5 0.457793 0.810575 0.854037 (00:01)\n",
"6 0.495422 0.867597 0.854037 (00:01)\n",
"7 0.490892 0.682565 0.866460 (00:01)\n",
"8 0.492190 0.413161 0.863354 (00:01)\n",
"9 0.504963 0.637783 0.850932 (00:01)\n",
"10 0.517931 0.550094 0.850932 (00:01)\n",
"\n"
]
}
],
"source": [
"cnn.fit_one_cycle(10, max_lr = 5e-5)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"class stacked_resnet_cores(nn.Module):\n",
" def __init__(self, dummy_databunch, num_images):\n",
" super().__init__()\n",
" self.resnets = [list(create_cnn(dummy_databunch, models.resnet34).model.children())[0] for _ in range(num_images)]\n",
" self.resnets = torch.nn.ModuleList(self.resnets)\n",
" self.pool = AdaptiveConcatPool2d()\n",
" \n",
" def forward(self, x):\n",
" outputs = []\n",
" for k,j in enumerate(self.resnets):\n",
" output = self.resnets[k](x[:,k*3:(k+1)*3,:,:])\n",
" outputs.append(output)\n",
" outputs = [self.pool(output) for output in outputs]\n",
" output = torch.cat(outputs, dim=1).squeeze()\n",
" return output"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"class multi_image_classifier(nn.Module):\n",
" def __init__(self, dummy_databunch, num_images, lin_ftrs, nc, ps=0.2):\n",
" super().__init__()\n",
" self.cores = stacked_resnet_cores(dummy_databunch, num_images)\n",
" nf = 1024 * num_images\n",
" lin_ftrs = [nf, 512, nc] if lin_ftrs is None else [nf] + lin_ftrs + [nc]\n",
" ps = listify(ps)\n",
" if len(ps)==1: ps = [ps[0]/2] * (len(lin_ftrs)-2) + ps\n",
" actns = [nn.ReLU(inplace=True)] * (len(lin_ftrs)-2) + [None]\n",
" self.layers = []\n",
" for ni,no,p,actn in zip(lin_ftrs[:-1],lin_ftrs[1:],ps,actns):\n",
" self.layers += bn_drop_lin(ni,no,True,p,actn)\n",
" self.layers = nn.ModuleList(self.layers)\n",
" self.head = nn.Sequential(self.layers)\n",
" \n",
" def forward(self, x):\n",
" return self.head(self.cores(x))\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"class multi_image_dataset(Dataset):\n",
" def __init__(self, paths, train=False, valid=False):\n",
" self.x = []\n",
" if train:\n",
" for path in paths:\n",
" data = (ImageItemList.from_folder(path) \n",
" .split_by_folder(train='TRAIN', valid='TEST')\n",
" .label_from_folder()\n",
" ).train\n",
" out = torch.cat([data[k][0].data.unsqueeze(-1) for k in range(len(data))], dim=-1)\n",
" self.x.append(out)\n",
" elif valid:\n",
" for path in paths:\n",
" data = (ImageItemList.from_folder(path) \n",
" .split_by_folder(train='TRAIN', valid='TEST')\n",
" .label_from_folder()\n",
" ).valid\n",
" out = torch.cat([data[k][0].data.unsqueeze(-1) for k in range(len(data))], dim=-1)\n",
" self.x.append(out)\n",
" self.x = torch.cat(self.x, dim=0)\n",
" self.y = data.y\n",
" self.length = out.size(-1)\n",
" \n",
" def __len__(self):\n",
" return self.length\n",
" \n",
" \n",
" def __getitem__(self, idx):\n",
" return self.x[:,:,:,idx], torch.Tensor([int(self.y[idx].cat)]).long().squeeze()"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
"methods = ['GADF','RP','MTF']\n",
"#for method in methods:\n",
"# prep_data(task=task, method=method, image_size=224)\n",
"paths = [Path('TSC/%s/%s'%(task,method)) for method in methods]"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
"tr_ds = multi_image_dataset(paths, train=True)\n",
"val_ds = multi_image_dataset(paths, valid=True)"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
"tr_dl = DataLoader(tr_ds, batch_size = 16, shuffle=True)\n",
"val_dl = DataLoader(val_ds, batch_size = 16, shuffle=True)\n",
"md = DataBunch(tr_dl, val_dl)"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [],
"source": [
"crit = nn.CrossEntropyLoss()\n",
"arch = multi_image_classifier_2(dummy_databunch=data, num_images=2, lin_ftrs=None, nc=4, ps=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
"learn = Learner(md, arch, loss_func = crit, metrics=accuracy)"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"learn.lr_find() ; learn.recorder.plot()"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total time: 00:11\n",
"epoch train_loss valid_loss accuracy\n",
"1 0.365040 0.265801 0.878882 (00:02)\n",
"2 0.360374 0.226251 0.928571 (00:02)\n",
"3 0.347392 0.239372 0.869565 (00:02)\n",
"4 0.329797 0.171565 0.931677 (00:02)\n",
"5 0.331179 0.175459 0.962733 (00:02)\n",
"\n"
]
}
],
"source": [
"learn.fit_one_cycle(5, max_lr=1e-4)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"class multi_image_classifier_2(nn.Module):\n",
" def __init__(self, dummy_databunch, num_images, lin_ftrs, nc, ps=0.2):\n",
" super().__init__()\n",
" self.cores = stacked_resnet_cores(dummy_databunch, num_images)\n",
" nf = 1024 * num_images\n",
" self.lin1 = nn.Linear(nf,512)\n",
" self.lin2 = nn.Linear(512,4)\n",
" self.dp1 = nn.Dropout(0.2)\n",
" self.dp2 = nn.Dropout(0.2)\n",
" \n",
" def forward(self,x):\n",
" x = self.cores(x)\n",
" x = self.lin2(self.dp2(self.lin1(self.dp1(x))))\n",
" return x\n",
" \n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment