MauroCE/lstm_keras

## lstm_keras
# Import relevant modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.layers import Dense, LSTM
from keras.models import Sequential

# Decide some settings
batch_size = 100
epochs = 50
timesteps = 5
features = 2
samples = 3000

# Randomly create some data. In this case, I am creating 2 features. One is simple sine wave
# with some noise, the other is a combination of various sine and cosines so that it is more messy
np.random.seed(1)
array1 = np.linspace(0,100, samples).reshape(-1,1)
array1 = np.sin(array1)**2 + 2 * np.sin(array1 / 4) - 4 * np.cos(array1 / 2)**3 + np.cos(array1)
array2 = np.sin(np.linspace(0,100, samples).reshape(-1,1))
array = np.hstack((array1, array2)) + np.random.uniform(-0.2, 0.2, size = (samples, features))
# Basically array is a 3000 x 2 numpy array.


# Define a function that takes in an array and returns a pd.DataFrame that has various columns for
# each variable, showing different timesteps
def series_to_supervised(data, n_in =1, n_out=1, dropnan=True):
    '''
    If data has 2 columns representing 2 features, and we specify n_in = 3, n_out = 2
    then this returns a DataFrame where the columns are
    ["var1(t-3)", "var2(t-3)", "var1(t-2)", "var2(t-2)", "var1(t-1)", "var2(t-1)", \
    "var1(t)", "var2(t)", "var1(t+1)", "var2(t+1)"]
    '''
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # Input sequence (t-n, .., t-1)
    for i in range(n_in, 0,-1):
        cols.append(df.shift(i))
        names+= ['var%d(t-%d)' % (j+1, i) for j in range(n_vars)]
    # Forecast sequence (t,t+1,...,t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # Put it all together
    agg = pd.concat(cols, axis = 1)
    agg.columns = names
    # Drop rows with Nan Values
    if dropnan:
        agg.dropna(inplace=True)
    return agg


# Use the function to create the data. Want to use previous 5 observations to predict next 2
# I.e. use t-5, .., t-1 (for both variables) to predict t, t+1 (for both variables)
data = series_to_supervised(array, n_in = timesteps, n_out = 2)

# Separate the "X" data by the "Y" data
X = data[['var1(t-5)', 'var2(t-5)', 'var1(t-4)', 'var2(t-4)','var1(t-3)', 'var2(t-3)', 'var1(t-2)', 'var2(t-2)', 'var1(t-1)','var2(t-1)']]
Y = data[['var1(t)', 'var2(t)', 'var1(t+1)', 'var2(t+1)']]

# Separate into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X.values, Y.values, test_size=0.33, shuffle=False)

# Need to reshape into (samples, timesteps, features)
X_train = X_train.reshape(X.train.shape[0], timesteps, features)
X_test  = X_test.reshape(X_test.shape[0], timesteps, features)

# Make the Keras Model
model = Sequential()
model.add(LSTM(30, input_shape = (timesteps, features), return_sequences=True))
model.add(LSTM(20))
model.add(Dense(4, activation='linear'))   # Want to predict t and t+1 for both features

# Compile model
model.compile(loss='mse', optimizer='rmsprop')

# Fit and store the history
history = model.fit(X_train, Y_train, batch_size = batch_size, epochs = epochs, verbose = 1)

# Make predictions
pred = model.predict(X_test, batch_size = batch_size)

# To check if something works, we can plot
plt.plot(pred[:200, 0], label = 'pred')
plt.plot(Y_test[:200, 0], label = 'true')
plt.legend()
plt.title('First 200 predictions for 1 step ahead (t) for first feature/column')

plt.plot(pred[:200, 1], label = 'pred')
plt.plot(Y_test[:200, 1], label = 'true')
plt.legend()
plt.title('First 200 predictions for 1 step ahead (t) for second feature/column')

plt.plot(pred[:200, 2], label = 'pred')
plt.plot(Y_test[:200, 2], label = 'true')
plt.legend()
plt.title('First 200 predictions for 2 step ahead (t+1) for first feature/column')

plt.plot(pred[:200, 3], label = 'pred')
plt.plot(Y_test[:200, 3], label = 'true')
plt.legend()
plt.title('First 200 predictions for 2 step ahead (t+1) for second feature/column')
	# Import relevant modules
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.model_selection import train_test_split
	from keras.layers import Dense, LSTM
	from keras.models import Sequential

	# Decide some settings
	batch_size = 100
	epochs = 50
	timesteps = 5
	features = 2
	samples = 3000

	# Randomly create some data. In this case, I am creating 2 features. One is simple sine wave
	# with some noise, the other is a combination of various sine and cosines so that it is more messy
	np.random.seed(1)
	array1 = np.linspace(0,100, samples).reshape(-1,1)
	array1 = np.sin(array1)*2 + 2 np.sin(array1 / 4) - 4 * np.cos(array1 / 2)**3 + np.cos(array1)
	array2 = np.sin(np.linspace(0,100, samples).reshape(-1,1))
	array = np.hstack((array1, array2)) + np.random.uniform(-0.2, 0.2, size = (samples, features))
	# Basically array is a 3000 x 2 numpy array.


	# Define a function that takes in an array and returns a pd.DataFrame that has various columns for
	# each variable, showing different timesteps
	def series_to_supervised(data, n_in =1, n_out=1, dropnan=True):
	'''
	If data has 2 columns representing 2 features, and we specify n_in = 3, n_out = 2
	then this returns a DataFrame where the columns are
	["var1(t-3)", "var2(t-3)", "var1(t-2)", "var2(t-2)", "var1(t-1)", "var2(t-1)", \
	"var1(t)", "var2(t)", "var1(t+1)", "var2(t+1)"]
	'''
	n_vars = 1 if type(data) is list else data.shape[1]
	df = pd.DataFrame(data)
	cols, names = list(), list()
	# Input sequence (t-n, .., t-1)
	for i in range(n_in, 0,-1):
	cols.append(df.shift(i))
	names+= ['var%d(t-%d)' % (j+1, i) for j in range(n_vars)]
	# Forecast sequence (t,t+1,...,t+n)
	for i in range(0, n_out):
	cols.append(df.shift(-i))
	if i == 0:
	names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
	else:
	names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# Put it all together
	agg = pd.concat(cols, axis = 1)
	agg.columns = names
	# Drop rows with Nan Values
	if dropnan:
	agg.dropna(inplace=True)
	return agg


	# Use the function to create the data. Want to use previous 5 observations to predict next 2
	# I.e. use t-5, .., t-1 (for both variables) to predict t, t+1 (for both variables)
	data = series_to_supervised(array, n_in = timesteps, n_out = 2)

	# Separate the "X" data by the "Y" data
	X = data[['var1(t-5)', 'var2(t-5)', 'var1(t-4)', 'var2(t-4)','var1(t-3)', 'var2(t-3)', 'var1(t-2)', 'var2(t-2)', 'var1(t-1)','var2(t-1)']]
	Y = data[['var1(t)', 'var2(t)', 'var1(t+1)', 'var2(t+1)']]

	# Separate into training and testing sets
	X_train, X_test, Y_train, Y_test = train_test_split(X.values, Y.values, test_size=0.33, shuffle=False)

	# Need to reshape into (samples, timesteps, features)
	X_train = X_train.reshape(X.train.shape[0], timesteps, features)
	X_test = X_test.reshape(X_test.shape[0], timesteps, features)

	# Make the Keras Model
	model = Sequential()
	model.add(LSTM(30, input_shape = (timesteps, features), return_sequences=True))
	model.add(LSTM(20))
	model.add(Dense(4, activation='linear')) # Want to predict t and t+1 for both features

	# Compile model
	model.compile(loss='mse', optimizer='rmsprop')

	# Fit and store the history
	history = model.fit(X_train, Y_train, batch_size = batch_size, epochs = epochs, verbose = 1)

	# Make predictions
	pred = model.predict(X_test, batch_size = batch_size)

	# To check if something works, we can plot
	plt.plot(pred[:200, 0], label = 'pred')
	plt.plot(Y_test[:200, 0], label = 'true')
	plt.legend()
	plt.title('First 200 predictions for 1 step ahead (t) for first feature/column')

	plt.plot(pred[:200, 1], label = 'pred')
	plt.plot(Y_test[:200, 1], label = 'true')
	plt.legend()
	plt.title('First 200 predictions for 1 step ahead (t) for second feature/column')

	plt.plot(pred[:200, 2], label = 'pred')
	plt.plot(Y_test[:200, 2], label = 'true')
	plt.legend()
	plt.title('First 200 predictions for 2 step ahead (t+1) for first feature/column')

	plt.plot(pred[:200, 3], label = 'pred')
	plt.plot(Y_test[:200, 3], label = 'true')
	plt.legend()
	plt.title('First 200 predictions for 2 step ahead (t+1) for second feature/column')