Skip to content

Instantly share code, notes, and snippets.

@MauroCE
Last active January 22, 2018 21:18
Show Gist options
  • Save MauroCE/c88fb18849bc1c38f470ce535457e17b to your computer and use it in GitHub Desktop.
Save MauroCE/c88fb18849bc1c38f470ce535457e17b to your computer and use it in GitHub Desktop.
# Import relevant modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.layers import Dense, LSTM
from keras.models import Sequential
# Decide some settings
batch_size = 100
epochs = 50
timesteps = 5
features = 2
samples = 3000
# Randomly create some data. In this case, I am creating 2 features. One is simple sine wave
# with some noise, the other is a combination of various sine and cosines so that it is more messy
np.random.seed(1)
array1 = np.linspace(0,100, samples).reshape(-1,1)
array1 = np.sin(array1)**2 + 2 * np.sin(array1 / 4) - 4 * np.cos(array1 / 2)**3 + np.cos(array1)
array2 = np.sin(np.linspace(0,100, samples).reshape(-1,1))
array = np.hstack((array1, array2)) + np.random.uniform(-0.2, 0.2, size = (samples, features))
# Basically array is a 3000 x 2 numpy array.
# Define a function that takes in an array and returns a pd.DataFrame that has various columns for
# each variable, showing different timesteps
def series_to_supervised(data, n_in =1, n_out=1, dropnan=True):
'''
If data has 2 columns representing 2 features, and we specify n_in = 3, n_out = 2
then this returns a DataFrame where the columns are
["var1(t-3)", "var2(t-3)", "var1(t-2)", "var2(t-2)", "var1(t-1)", "var2(t-1)", \
"var1(t)", "var2(t)", "var1(t+1)", "var2(t+1)"]
'''
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols, names = list(), list()
# Input sequence (t-n, .., t-1)
for i in range(n_in, 0,-1):
cols.append(df.shift(i))
names+= ['var%d(t-%d)' % (j+1, i) for j in range(n_vars)]
# Forecast sequence (t,t+1,...,t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
# Put it all together
agg = pd.concat(cols, axis = 1)
agg.columns = names
# Drop rows with Nan Values
if dropnan:
agg.dropna(inplace=True)
return agg
# Use the function to create the data. Want to use previous 5 observations to predict next 2
# I.e. use t-5, .., t-1 (for both variables) to predict t, t+1 (for both variables)
data = series_to_supervised(array, n_in = timesteps, n_out = 2)
# Separate the "X" data by the "Y" data
X = data[['var1(t-5)', 'var2(t-5)', 'var1(t-4)', 'var2(t-4)','var1(t-3)', 'var2(t-3)', 'var1(t-2)', 'var2(t-2)', 'var1(t-1)','var2(t-1)']]
Y = data[['var1(t)', 'var2(t)', 'var1(t+1)', 'var2(t+1)']]
# Separate into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X.values, Y.values, test_size=0.33, shuffle=False)
# Need to reshape into (samples, timesteps, features)
X_train = X_train.reshape(X.train.shape[0], timesteps, features)
X_test = X_test.reshape(X_test.shape[0], timesteps, features)
# Make the Keras Model
model = Sequential()
model.add(LSTM(30, input_shape = (timesteps, features), return_sequences=True))
model.add(LSTM(20))
model.add(Dense(4, activation='linear')) # Want to predict t and t+1 for both features
# Compile model
model.compile(loss='mse', optimizer='rmsprop')
# Fit and store the history
history = model.fit(X_train, Y_train, batch_size = batch_size, epochs = epochs, verbose = 1)
# Make predictions
pred = model.predict(X_test, batch_size = batch_size)
# To check if something works, we can plot
plt.plot(pred[:200, 0], label = 'pred')
plt.plot(Y_test[:200, 0], label = 'true')
plt.legend()
plt.title('First 200 predictions for 1 step ahead (t) for first feature/column')
plt.plot(pred[:200, 1], label = 'pred')
plt.plot(Y_test[:200, 1], label = 'true')
plt.legend()
plt.title('First 200 predictions for 1 step ahead (t) for second feature/column')
plt.plot(pred[:200, 2], label = 'pred')
plt.plot(Y_test[:200, 2], label = 'true')
plt.legend()
plt.title('First 200 predictions for 2 step ahead (t+1) for first feature/column')
plt.plot(pred[:200, 3], label = 'pred')
plt.plot(Y_test[:200, 3], label = 'true')
plt.legend()
plt.title('First 200 predictions for 2 step ahead (t+1) for second feature/column')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment