-
-
Save fanannan/0314bb1c2a88b63d6aea to your computer and use it in GitHub Desktop.
Tada's usage (see discussion)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" From: http://danielhnyk.cz/predicting-sequences-vectors-keras-using-rnn-lstm/ | |
and https://gist.github.com/hnykda/f1eca6cb0061cde701c2#file-keras-py | |
See comments on the blog at danielhnyk.cz. """ | |
from keras.models import Sequential | |
from keras.layers.core import TimeDistributedDense, Dense, Activation, Dropout | |
from keras.layers.recurrent import GRU, LSTM | |
import numpy as np | |
import datetime | |
import matplotlib.pyplot as plt | |
try: | |
import pandas_datareader.data as web | |
except: | |
import pandas.io.data as web | |
def _load_data(data, steps = 40): | |
# This function creates (X,Y) pairs from time series such that Y(t) = X(t+1). | |
# 'steps' indicate the length of the blocks; i.e., the longest memory to be captured. | |
docX, docY = [], [] | |
for i in range(0, data.shape[0]/steps-1): | |
docX.append(data[i*steps:(i+1)*steps,:]) | |
docY.append(data[(i*steps+1):((i+1)*steps+1),:]) | |
alsX = np.array(docX) | |
alsY = np.array(docY) | |
return alsX, alsY | |
def train_test_split(data, test_size=0.15): | |
# This just splits data to training and testing parts | |
X,Y = _load_data(data) | |
ntrn = round(X.shape[0] * (1 - test_size)) | |
perms = np.random.permutation(X.shape[0]) | |
X_train, Y_train = X.take(perms[0:ntrn],axis=0), Y.take(perms[0:ntrn],axis=0) | |
X_test, Y_test = X.take(perms[ntrn:],axis=0),Y.take(perms[ntrn:],axis=0) | |
return (X_train, Y_train), (X_test, Y_test) | |
if __name__ == '__main__': | |
np.random.seed(0) # For reproducability | |
# The following codes replace the original data fetching | |
ticker = '^VIX' | |
start = datetime.datetime(2010, 1, 1) | |
end = datetime.datetime(2015, 12, 31) | |
stock_data = web.DataReader(ticker, 'yahoo', start, end) | |
close_prices = stock_data[['Adj Close']] | |
percent_changes = close_prices[['Adj Close']].pct_change().dropna() # note they are not normalized | |
# model building | |
if False: | |
data = close_prices.as_matrix() | |
else: | |
data = percent_changes.as_matrix() | |
(X_train, y_train), (X_test, y_test) = train_test_split(data) # retrieve data | |
print "Data loaded." | |
# | |
in_out_neurons = data.shape[1] | |
hidden_neurons = 200 | |
# | |
model = Sequential() | |
if False: | |
model.add(LSTM(hidden_neurons, input_dim=in_out_neurons, return_sequences=False)) | |
model.add(Dense(in_out_neurons, input_dim=hidden_neurons)) | |
else: | |
model.add(GRU(hidden_neurons, input_dim=in_out_neurons, return_sequences=True)) | |
model.add(Dropout(0.2)) | |
model.add(TimeDistributedDense(in_out_neurons)) | |
model.add(Activation("linear")) | |
model.compile(loss="mean_squared_error", optimizer="rmsprop") | |
print "Model compiled." | |
# and now train the model. | |
num_epoches = 200 | |
model.fit(X_train, y_train, batch_size=30, nb_epoch=num_epoches, validation_split=0.1) | |
predicted = model.predict(X_test) | |
rmse = np.sqrt(((predicted - y_test) ** 2).mean(axis=0)).mean() # Printing RMSE | |
# outputs | |
#pd.DataFrame(predicted).to_csv("./predicted.csv") | |
#pd.DataFrame(y_test).to_csv("./test_data.csv") | |
fig = plt.figure() | |
ax = fig.add_subplot(111) | |
xs = map(lambda x:x[0][0], predicted) | |
ys = map(lambda x:x[0][0], y_test) | |
sc = ax.scatter(xs, ys, marker='o', color='b') | |
ax.set_title('Scatter Graph: RMSE=%.2f' % rmse, size=16) | |
ax.set_xlabel('Prediction', size=14) | |
ax.set_ylabel('Actual', size=14) | |
ax.grid(True) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment