Skip to content

Instantly share code, notes, and snippets.

View ranpelta's full-sized avatar

Ran Pelta ranpelta

View GitHub Profile
import pandas as pd
import numpy as np
import keras
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
n_input = 25 #how many samples/rows/timesteps to look in the past in order to forecast the next sample
n_features= X_train.shape[1] # how many predictors/Xs/features we have to predict y
b_size = 32 # Number of timeseries samples in each batch
generator = TimeseriesGenerator(scaled_X_train, scaled_y_train, length=n_input, batch_size=b_size)
print(generator[0][0].shape)
>>> (32, 25, 3)
# import packages
import pandas as pd
import numpy as np
import keras
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
X_test = test.drop(y_col,axis=1).copy()
scaled_X_test = Xscaler.transform(X_test)
test_generator = TimeseriesGenerator(scaled_X_test, np.zeros(len(X_test)), length=n_input, batch_size=b_size)
print(test_generator[0][0].shape)
>>> (32, 25, 3)
y_pred_scaled = model.predict(test_generator)
model.fit_generator(generator,epochs=5)
>>>
Epoch 1/5
903/903 [==============================] - 14s 15ms/step - loss: 0.0311
Epoch 2/5
903/903 [==============================] - 14s 15ms/step - loss: 0.0189
Epoch 3/5
903/903 [==============================] - 14s 16ms/step - loss: 0.0170
model = Sequential()
model.add(LSTM(150, activation='relu', input_shape=(n_input, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.summary()
>>>
Model: "sequential_1"
Xscaler = MinMaxScaler(feature_range=(0, 1)) # scale so that all the X data will range from 0 to 1
Xscaler.fit(X_train)
scaled_X_train = Xscaler.transform(X_train)
print(X_train.shape)
Yscaler = MinMaxScaler(feature_range=(0, 1))
Yscaler.fit(y_train)
scaled_y_train = Yscaler.transform(y_train)
print(scaled_y_train.shape)
scaled_y_train = scaled_y_train.reshape(-1) # remove the second dimention from y so the shape changes from (n,1) to (n,)
print(scaled_y_train.shape)
test_size = int(len(df) * 0.1) # the test data will be 10% (0.1) of the entire data
train = df.iloc[:-test_size,:].copy()
# the copy() here is important, it will prevent us from getting: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_index,col_indexer] = value instead
test = df.iloc[-test_size:,:].copy()
print(train.shape, test.shape)
>>> ((28916, 4), (3212, 4))
#separate X and y only for the train data (for now)
X_train = train.drop(y_col,axis=1).copy()
y_train = train[[y_col]].copy() # the double brakets here are to keep the y in a dataframe format, otherwise it will be pandas Series
print(X_train.shape, y_train.shape)
>>> (28916, 3) (28916, 1)
plt.figure(figsize=(50,4))
plt.plot(train.index,train[y_col],label='Train');
plt.plot(test.index,test[y_col],label='test')
plt.legend();