Created
October 22, 2015 04:11
-
-
Save Nemitek/31ccafbfceac2ed88974 to your computer and use it in GitHub Desktop.
Predicting sequences of vectors (regression) in Keras using RNN - LSTM (original by danielhnyk.cz) - fixed for Keras 0.2.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from random import random | |
flow = (list(range(1,10,1)) + list(range(10,1,-1)))*1000 | |
pdata = pd.DataFrame({"a":flow, "b":flow}) | |
pdata.b = pdata.b.shift(9) | |
data = pdata.iloc[10:] * random() # some noise | |
import numpy as np | |
def _load_data(data, n_prev = 100): | |
""" | |
data should be pd.DataFrame() | |
""" | |
docX, docY = [], [] | |
for i in range(len(data)-n_prev): | |
docX.append(data.iloc[i:i+n_prev].as_matrix()) | |
docY.append(data.iloc[i+n_prev].as_matrix()) | |
alsX = np.array(docX) | |
alsY = np.array(docY) | |
return alsX, alsY | |
def train_test_split(df, test_size=0.1): | |
""" | |
This just splits data to training and testing parts | |
""" | |
ntrn = round(len(df) * (1 - test_size)) | |
X_train, y_train = _load_data(df.iloc[0:ntrn]) | |
X_test, y_test = _load_data(df.iloc[ntrn:]) | |
return (X_train, y_train), (X_test, y_test) | |
from keras.models import Sequential | |
from keras.layers.core import Dense, Activation | |
from keras.layers.recurrent import LSTM | |
in_neurons = 2 | |
out_neurons = 2 | |
hidden_neurons = 20 | |
model = Sequential() | |
model.add(LSTM(output_dim=hidden_neurons, input_dim=in_neurons, return_sequences=False)) | |
model.add(Dense(output_dim=out_neurons, input_dim=hidden_neurons)) | |
model.add(Activation("linear")) | |
model.compile(loss="mean_squared_error", optimizer="rmsprop") | |
(X_train, y_train), (X_test, y_test) = train_test_split(data) # retrieve data | |
model.fit(X_train, y_train, batch_size=450, nb_epoch=10, validation_split=0.05) | |
predicted = model.predict(X_test) | |
rmse = np.sqrt(((predicted - y_test) ** 2).mean(axis=0)) | |
# and maybe plot it | |
pd.DataFrame(predicted[:100]).to_csv("predicted.csv") | |
pd.DataFrame(y_test[:100]).plot("test_data.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Last line is: pd.DataFrame(y_test[:100]).to_csv("test_data.csv")