Created
October 21, 2018 20:29
-
-
Save okwrtdsh/b82df846192c775b391620dbf0c572a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Using TensorFlow backend.\n" | |
] | |
} | |
], | |
"source": [ | |
"import warnings\n", | |
"warnings.simplefilter(action='ignore', category=FutureWarning)\n", | |
"import keras" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# data数\n", | |
"N_DATA = 1234\n", | |
"# 特徴数\n", | |
"N_FEATURE = 122\n", | |
"# 時刻幅\n", | |
"TIME_STEP = 100\n", | |
"# class数\n", | |
"N_CLASS = 2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"((1234, 100, 122), (1234, 2))" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"from keras.utils import to_categorical\n", | |
"X_train = np.random.random(size=(N_DATA, TIME_STEP, N_FEATURE))\n", | |
"y_train = to_categorical(np.random.randint(N_CLASS, size=N_DATA), N_CLASS)\n", | |
"X_train.shape, y_train.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"simple_rnn_1 (SimpleRNN) (None, 100) 22300 \n", | |
"_________________________________________________________________\n", | |
"dense_1 (Dense) (None, 2) 202 \n", | |
"=================================================================\n", | |
"Total params: 22,502\n", | |
"Trainable params: 22,502\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n" | |
] | |
} | |
], | |
"source": [ | |
"from keras.models import Sequential\n", | |
"from keras.layers import Dense, SimpleRNN\n", | |
"from keras.optimizers import Adam\n", | |
"\n", | |
"# 時刻幅に対して1つのラベル\n", | |
"model = Sequential()\n", | |
"model.add(SimpleRNN(100, input_shape=(TIME_STEP, N_FEATURE)))\n", | |
"model.add(Dense(N_CLASS, activation='softmax'))\n", | |
"model.compile(\n", | |
" loss='categorical_crossentropy',\n", | |
" optimizer=Adam(lr=1e-3),\n", | |
" metrics=['accuracy']\n", | |
")\n", | |
"model.summary()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/1\n", | |
"1234/1234 [==============================] - 4s 3ms/step - loss: 0.7240 - acc: 0.4968\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f4a905bcfd0>" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model.fit(\n", | |
" X_train, y_train,\n", | |
" batch_size=32,\n", | |
" epochs=1,\n", | |
" verbose=1\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(1234, 122)" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 元データからの変換\n", | |
"X_data = np.arange(N_DATA*N_FEATURE).reshape(N_DATA, N_FEATURE)\n", | |
"X_data.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(12, 100, 122)" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# オーバラップ無しで分割\n", | |
"np.array(list(zip(*[iter(X_data)]*TIME_STEP))).shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(1134, 100, 122)" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 1時刻ずつずらして分割\n", | |
"def sliding_window(a, step):\n", | |
" return np.lib.stride_tricks.as_strided(a, shape=(a.shape[0]-step, step, a.shape[-1]), strides=a.strides + (a.strides[-1]*step,))\n", | |
"\n", | |
"sliding_window(X_data, TIME_STEP).shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(1234, 100, 122)" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from keras.preprocessing.sequence import pad_sequences\n", | |
"\n", | |
"# 長さが異なる場合\n", | |
"sequences = [\n", | |
" np.random.random(size=(i+1, N_FEATURE)) for i in range(N_DATA)\n", | |
"]\n", | |
"pad_sequences(\n", | |
" sequences,\n", | |
" maxlen=TIME_STEP, # 揃えたい長さ\n", | |
" dtype=np.float32,\n", | |
" value=0.0, # 0埋め\n", | |
" padding='pre', # maxlenより短い場合に、前をvalueで埋める(後の場合は'post')\n", | |
" truncating='pre', # maxlenより長い場合に、前を切り捨てる(後の場合は'post')\n", | |
").shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"((1234, 100, 122), (1234, 100, 2))" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X_train2 = np.random.random(size=(N_DATA, TIME_STEP, N_FEATURE))\n", | |
"y_train2 = to_categorical(np.random.randint(N_CLASS, size=(N_DATA, TIME_STEP)), N_CLASS)\n", | |
"X_train2.shape, y_train2.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"simple_rnn_2 (SimpleRNN) (None, 100, 100) 22300 \n", | |
"_________________________________________________________________\n", | |
"time_distributed_1 (TimeDist (None, 100, 2) 202 \n", | |
"=================================================================\n", | |
"Total params: 22,502\n", | |
"Trainable params: 22,502\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n" | |
] | |
} | |
], | |
"source": [ | |
"from keras.layers import TimeDistributed\n", | |
"\n", | |
"# 時刻に対して1つのラベル\n", | |
"model2 = Sequential()\n", | |
"model2.add(SimpleRNN(100, return_sequences=True, input_shape=(TIME_STEP, N_FEATURE)))\n", | |
"model2.add(TimeDistributed(Dense(N_CLASS, activation='softmax')))\n", | |
"model2.compile(\n", | |
" loss='categorical_crossentropy',\n", | |
" optimizer=Adam(lr=1e-3),\n", | |
" metrics=['accuracy']\n", | |
")\n", | |
"model2.summary()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/1\n", | |
"1234/1234 [==============================] - 5s 4ms/step - loss: 0.7272 - acc: 0.5005\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f4a992134a8>" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model2.fit(\n", | |
" X_train2, y_train2,\n", | |
" batch_size=32,\n", | |
" epochs=1,\n", | |
" verbose=1\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"* 個人的には`SimpleRNN`よりも`LSTM`がおすすめ\n", | |
"* `SimpleRNN`は長期の依存関係が学習できない\n", | |
"* `return_sequences=False (default)`の場合、RNNの最終出力になるので最終時刻に影響される\n", | |
"* kerasで可変長はできないので、RNNの時間幅とpad_sequencesのmaxlenをデータの最大長より大きく設定\n", | |
"* 特徴方向で特徴化する場合は`TimeDistributed`\n", | |
"* 時間と特徴から特徴する場合には`Flatten`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"lstm_1 (LSTM) (None, 100) 89200 \n", | |
"_________________________________________________________________\n", | |
"dense_3 (Dense) (None, 2) 202 \n", | |
"=================================================================\n", | |
"Total params: 89,402\n", | |
"Trainable params: 89,402\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n" | |
] | |
} | |
], | |
"source": [ | |
"from keras.layers import LSTM\n", | |
"\n", | |
"# 時刻幅に対して1つのラベル(LSTMに置き換え)\n", | |
"model3 = Sequential()\n", | |
"model3.add(LSTM(100, input_shape=(TIME_STEP, N_FEATURE)))\n", | |
"model3.add(Dense(N_CLASS, activation='softmax'))\n", | |
"model3.compile(\n", | |
" loss='categorical_crossentropy',\n", | |
" optimizer=Adam(lr=1e-3),\n", | |
" metrics=['accuracy']\n", | |
")\n", | |
"model3.summary()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/1\n", | |
"1234/1234 [==============================] - 12s 9ms/step - loss: 0.7087 - acc: 0.4878\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f4a9008c080>" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model3.fit(\n", | |
" X_train, y_train,\n", | |
" batch_size=32,\n", | |
" epochs=1,\n", | |
" verbose=1\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"_________________________________________________________________\n", | |
"Layer (type) Output Shape Param # \n", | |
"=================================================================\n", | |
"lstm_2 (LSTM) (None, 100, 122) 119560 \n", | |
"_________________________________________________________________\n", | |
"time_distributed_2 (TimeDist (None, 100, 10) 1230 \n", | |
"_________________________________________________________________\n", | |
"flatten_1 (Flatten) (None, 1000) 0 \n", | |
"_________________________________________________________________\n", | |
"dense_5 (Dense) (None, 10) 10010 \n", | |
"_________________________________________________________________\n", | |
"dense_6 (Dense) (None, 2) 22 \n", | |
"=================================================================\n", | |
"Total params: 130,822\n", | |
"Trainable params: 130,822\n", | |
"Non-trainable params: 0\n", | |
"_________________________________________________________________\n" | |
] | |
} | |
], | |
"source": [ | |
"from keras.layers import Flatten\n", | |
"\n", | |
"model4 = Sequential()\n", | |
"# 各時刻における時間と特徴を考慮した122の特徴量を出力(特徴に時間依存を反映)\n", | |
"model4.add(LSTM(N_FEATURE, return_sequences=True, input_shape=(TIME_STEP, N_FEATURE)))\n", | |
"model4.add(TimeDistributed(Dense(10))) # 特徴方向のみ圧縮\n", | |
"model4.add(Flatten())\n", | |
"model4.add(Dense(10)) # 時間と特徴で圧縮\n", | |
"model4.add(Dense(N_CLASS, activation='softmax'))\n", | |
"model4.compile(\n", | |
" loss='categorical_crossentropy',\n", | |
" optimizer=Adam(lr=1e-3),\n", | |
" metrics=['accuracy']\n", | |
")\n", | |
"model4.summary()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/1\n", | |
"1234/1234 [==============================] - 16s 13ms/step - loss: 0.7298 - acc: 0.4846\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"<keras.callbacks.History at 0x7f4a99213438>" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model4.fit(\n", | |
" X_train, y_train,\n", | |
" batch_size=32,\n", | |
" epochs=1,\n", | |
" verbose=1\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment