Skip to content

Instantly share code, notes, and snippets.

@GermanCM
Last active April 16, 2024 23:09
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save GermanCM/1943a0dc1eac04f848c6fe9b16947ac4 to your computer and use it in GitHub Desktop.
Save GermanCM/1943a0dc1eac04f848c6fe9b16947ac4 to your computer and use it in GitHub Desktop.
bayesian optimization with keras tuner for time series
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
return data[:-int(n_test)], data[-int(n_test):]
# transform list into supervised learning format
def series_to_supervised(data, n_in, n_out=1):
import pandas as pd
df = pd.DataFrame(data)
cols = list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
# put it all together
agg = pd.concat(cols, axis=1)
# drop rows with NaN values
agg.dropna(inplace=True)
return agg.values
# root mean squared error or rmse
def measure_rmse(actual, predicted):
return sqrt(mean_squared_error(actual, predicted))
# difference dataset TO MAKE THE SERIES STATIONARY:
'''
make the data stationary by performing a seasonal adjustment, that is subtracting the value
from one year ago from each observation.
'''
def difference(data, interval):
return [data[i] - data[i - interval] for i in range(interval, len(data))]
'''
First, we define a model-building function. It takes an argument hp from which you can sample hyperparameters, such as hp.Int('units', min_value=32, max_value=512, step=32) (an integer from a certain range).
This function returns a compiled model.
'''
from tensorflow import keras
from kerastuner.tuners import BayesianOptimization
n_input = 6
def build_model(hp):
model = keras.Sequential()
model.add(keras.layers.LSTM(units=hp.Int('units',min_value=32,
max_value=512,
step=32),
activation='relu', input_shape=(n_input, 1)))
model.add(keras.layers.Dense(units=hp.Int('units',min_value=32,
max_value=512,
step=32), activation='relu'))
model.add(keras.layers.Dense(1))
model.compile(loss='mse', metrics=['mse'], optimizer=keras.optimizers.Adam(
hp.Choice('learning_rate',
values=[1e-2, 1e-3, 1e-4])))
return model
# define model
bayesian_opt_tuner = BayesianOptimization(
build_model,
objective='mse',
max_trials=7,
executions_per_trial=2,
#directory=os.path.normpath('C:/keras_tuning'),
project_name='timeseries_bayes_opt_POC',
overwrite=True)
# walk-forward validation for univariate data
def walk_forward_validation(n_test, cfg, train_data):
predictions = list()
# split dataset
train, test = train_test_split(train_data, n_test)
# fit model
model = model_fit(train, cfg)
# seed history with training dataset
history = [x for x in train]
# step over each time-step in the test set
for i in range(len(test)):
# fit model and make forecast for history
yhat = model_predict(model, history, cfg)
# store forecast in list of predictions
predictions.append(yhat)
# add actual observation to history for the next loop
history.append(test[i])
# estimate prediction error
error = measure_rmse(test, predictions)
print(' > %.3f' % error)
return error, predictions
# repeat evaluation of a config
def repeat_evaluate(data, n_test, config, n_repeats=30):
# fit and evaluate the model n times
predictions_matrix = []
scores = []
#scores = [walk_forward_validation(data, n_test, config) for _ in range(n_repeats)]
# split dataset
train, test = train_test_split(data, n_test)
# fit model
#model = model_fit(train, config)
for _ in range(n_repeats):
score, predictions = walk_forward_validation(n_test, config, train)
scores.append(score)
predictions_matrix.append(predictions)
return scores, predictions_matrix
def model_fit(train, config):
# unpack config
n_input, n_nodes, n_epochs, n_batch, n_diff = config
# prepare data
if n_diff > 0:
train = difference(train, n_diff)
data = series_to_supervised(train, n_input)
train_x, train_y = data[:, :-1], data[:, -1]
train_x = train_x.reshape((train_x.shape[0], train_x.shape[1], 1))
bayesian_opt_tuner.search(train_x, train_y,
epochs=5,
#validation_data=(X_test, y_test)
validation_split=0.2,verbose=1)
#get best model
bayes_opt_model_best_model = bayesian_opt_tuner.get_best_models(num_models=1)
model = bayes_opt_model_best_model[0]
# fit
#model.fit(train_x, train_y, epochs=n_epochs, batch_size=n_batch, verbose=0)
return model
def model_predict(model, history, config):
import numpy as np
# unpack config
n_diff = config[-1]
# prepare data
correction = 0.0
if n_diff > 0:
correction = history[-n_diff]
history = difference(history, n_diff)
x_input = np.array(history[-n_input:]).reshape((1, n_input, 1))
# forecast
yhat = model.predict(x_input, verbose=0)
return correction + yhat[0]
#####################################
import pandas as pd
car_sales_data = pd.read_csv('https://gist.githubusercontent.com/GermanCM/3048fec12c7c4c9def80c77be9b3b693/raw/88c8efa1f153a03446a6de4b2f1aad942a7db3fc/china_electric_vehicles_sales.csv')
sales_series_values = car_sales_data.sales
n_test = int(0.1*len(sales_series_values))
# define config: n_input, n_nodes, n_epochs, n_batch, n_diff
config = [6, None, 100, 100, 0]
# grid search
scores, predictions_matrix = repeat_evaluate(sales_series_values, n_test, config)
# summarize scores: n_diff = 0, SIN DIFFERENCING, apply_log = True
summarize_scores('LSTM', scores)
# summarize scores: n_diff = 0, SIN DIFFERENCING, apply_log = True
summarize_scores('LSTM with bayesian hiperametrization + walk-forward', scores)
@pereira-rafael
Copy link

When run the code i get a error:

'None' type object is not subscriptable.

---> train_x, train_y = data[:, :-1] data[:,-1]

@GermanCM
Copy link
Author

Hello Pereira, you can find the data source on this gist link; also, copy again ther gist code with some updated lines ;)

On the other hand, I recommend to you to have a look at this other way of making a time series forecast (ready also to use more than one feature as input).

@jpca227
Copy link

jpca227 commented Feb 23, 2023

When I run this code I an error and it does not finish...


Search: Running Trial #7

Hyperparameter |Value |Best Value So Far
units |352 |32
learning_rate |0.01 |0.01

Epoch 1/5
2/2 [==============================] - 1s 302ms/step - loss: 957616.8125 - mse: 957616.8125 - val_loss: 4251291.5000 - val_mse: 4251291.5000
Epoch 2/5
2/2 [==============================] - 0s 47ms/step - loss: 974074.1875 - mse: 974074.1875 - val_loss: 9279387.0000 - val_mse: 9279387.0000
Epoch 3/5
2/2 [==============================] - 0s 94ms/step - loss: 721412.1875 - mse: 721412.1875 - val_loss: 6747038.0000 - val_mse: 6747038.0000
Epoch 4/5
2/2 [==============================] - 0s 89ms/step - loss: 552105.6875 - mse: 552105.6875 - val_loss: 2959386.0000 - val_mse: 2959386.0000
Epoch 5/5
2/2 [==============================] - 0s 45ms/step - loss: 588104.4375 - mse: 588104.4375 - val_loss: 7050130.5000 - val_mse: 7050130.5000
Epoch 1/5
2/2 [==============================] - 2s 276ms/step - loss: 1016230.5625 - mse: 1016230.5625 - val_loss: 4832428.0000 - val_mse: 4832428.0000
Epoch 2/5
2/2 [==============================] - 0s 38ms/step - loss: 2784067.5000 - mse: 2784067.5000 - val_loss: 8391198.0000 - val_mse: 8391198.0000
Epoch 3/5
2/2 [==============================] - 0s 42ms/step - loss: 664607.6250 - mse: 664607.6250 - val_loss: 3311314.2500 - val_mse: 3311314.2500
Epoch 4/5
2/2 [==============================] - 0s 41ms/step - loss: 552263.5000 - mse: 552263.5000 - val_loss: 6294855.0000 - val_mse: 6294855.0000
Epoch 5/5
2/2 [==============================] - 0s 92ms/step - loss: 545044.0000 - mse: 545044.0000 - val_loss: 5272206.0000 - val_mse: 5272206.0000
Trial 7 Complete [00h 00m 04s]
mse: 548574.84375

Best mse So Far: 483693.953125
Total elapsed time: 00h 00m 33s
INFO:tensorflow:Oracle triggered exit
Traceback (most recent call last):

File ~\anaconda3\lib\site-packages\pandas\core\indexes\range.py:385 in get_loc
return self._range.index(new_key)

ValueError: 0 is not in range

The above exception was the direct cause of the following exception:

Traceback (most recent call last):

File C:\Python\untitled0.py:162 in
scores, predictions_matrix = repeat_evaluate(sales_series_values, n_test, config)

File C:\Python\untitled0.py:106 in repeat_evaluate
score, predictions = walk_forward_validation(n_test, config, train)

File C:\Python\untitled0.py:87 in walk_forward_validation
history.append(test[i])

File ~\anaconda3\lib\site-packages\pandas\core\series.py:958 in getitem
return self._get_value(key)

File ~\anaconda3\lib\site-packages\pandas\core\series.py:1069 in _get_value
loc = self.index.get_loc(label)

File ~\anaconda3\lib\site-packages\pandas\core\indexes\range.py:387 in get_loc
raise KeyError(key) from err

KeyError: 0

@jpca227
Copy link

jpca227 commented Feb 23, 2023

Hi GermanCM...I saw that you referenced Jason Brownlee's book on time series forecasting.

I have been trying to apply Bayesian Optimization to Listing 19.13 in Deep Learning For Time-Series Forecasting for over 2 years!

This must be a very difficult problem because I have seen no examples in two years of anyone attempting to apply Bayesian Optimization to time series forecasting.

Your work is the closest I have seen to a solution, however, I still cannot get it to work.

Can you help with this?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment