-
-
Save GermanCM/1943a0dc1eac04f848c6fe9b16947ac4 to your computer and use it in GitHub Desktop.
# split a univariate dataset into train/test sets | |
def train_test_split(data, n_test): | |
return data[:-int(n_test)], data[-int(n_test):] | |
# transform list into supervised learning format | |
def series_to_supervised(data, n_in, n_out=1): | |
import pandas as pd | |
df = pd.DataFrame(data) | |
cols = list() | |
# input sequence (t-n, ... t-1) | |
for i in range(n_in, 0, -1): | |
cols.append(df.shift(i)) | |
# forecast sequence (t, t+1, ... t+n) | |
for i in range(0, n_out): | |
cols.append(df.shift(-i)) | |
# put it all together | |
agg = pd.concat(cols, axis=1) | |
# drop rows with NaN values | |
agg.dropna(inplace=True) | |
return agg.values | |
# root mean squared error or rmse | |
def measure_rmse(actual, predicted): | |
return sqrt(mean_squared_error(actual, predicted)) | |
# difference dataset TO MAKE THE SERIES STATIONARY: | |
''' | |
make the data stationary by performing a seasonal adjustment, that is subtracting the value | |
from one year ago from each observation. | |
''' | |
def difference(data, interval): | |
return [data[i] - data[i - interval] for i in range(interval, len(data))] | |
''' | |
First, we define a model-building function. It takes an argument hp from which you can sample hyperparameters, such as hp.Int('units', min_value=32, max_value=512, step=32) (an integer from a certain range). | |
This function returns a compiled model. | |
''' | |
from tensorflow import keras | |
from kerastuner.tuners import BayesianOptimization | |
n_input = 6 | |
def build_model(hp): | |
model = keras.Sequential() | |
model.add(keras.layers.LSTM(units=hp.Int('units',min_value=32, | |
max_value=512, | |
step=32), | |
activation='relu', input_shape=(n_input, 1))) | |
model.add(keras.layers.Dense(units=hp.Int('units',min_value=32, | |
max_value=512, | |
step=32), activation='relu')) | |
model.add(keras.layers.Dense(1)) | |
model.compile(loss='mse', metrics=['mse'], optimizer=keras.optimizers.Adam( | |
hp.Choice('learning_rate', | |
values=[1e-2, 1e-3, 1e-4]))) | |
return model | |
# define model | |
bayesian_opt_tuner = BayesianOptimization( | |
build_model, | |
objective='mse', | |
max_trials=7, | |
executions_per_trial=2, | |
#directory=os.path.normpath('C:/keras_tuning'), | |
project_name='timeseries_bayes_opt_POC', | |
overwrite=True) | |
# walk-forward validation for univariate data | |
def walk_forward_validation(n_test, cfg, train_data): | |
predictions = list() | |
# split dataset | |
train, test = train_test_split(train_data, n_test) | |
# fit model | |
model = model_fit(train, cfg) | |
# seed history with training dataset | |
history = [x for x in train] | |
# step over each time-step in the test set | |
for i in range(len(test)): | |
# fit model and make forecast for history | |
yhat = model_predict(model, history, cfg) | |
# store forecast in list of predictions | |
predictions.append(yhat) | |
# add actual observation to history for the next loop | |
history.append(test[i]) | |
# estimate prediction error | |
error = measure_rmse(test, predictions) | |
print(' > %.3f' % error) | |
return error, predictions | |
# repeat evaluation of a config | |
def repeat_evaluate(data, n_test, config, n_repeats=30): | |
# fit and evaluate the model n times | |
predictions_matrix = [] | |
scores = [] | |
#scores = [walk_forward_validation(data, n_test, config) for _ in range(n_repeats)] | |
# split dataset | |
train, test = train_test_split(data, n_test) | |
# fit model | |
#model = model_fit(train, config) | |
for _ in range(n_repeats): | |
score, predictions = walk_forward_validation(n_test, config, train) | |
scores.append(score) | |
predictions_matrix.append(predictions) | |
return scores, predictions_matrix | |
def model_fit(train, config): | |
# unpack config | |
n_input, n_nodes, n_epochs, n_batch, n_diff = config | |
# prepare data | |
if n_diff > 0: | |
train = difference(train, n_diff) | |
data = series_to_supervised(train, n_input) | |
train_x, train_y = data[:, :-1], data[:, -1] | |
train_x = train_x.reshape((train_x.shape[0], train_x.shape[1], 1)) | |
bayesian_opt_tuner.search(train_x, train_y, | |
epochs=5, | |
#validation_data=(X_test, y_test) | |
validation_split=0.2,verbose=1) | |
#get best model | |
bayes_opt_model_best_model = bayesian_opt_tuner.get_best_models(num_models=1) | |
model = bayes_opt_model_best_model[0] | |
# fit | |
#model.fit(train_x, train_y, epochs=n_epochs, batch_size=n_batch, verbose=0) | |
return model | |
def model_predict(model, history, config): | |
import numpy as np | |
# unpack config | |
n_diff = config[-1] | |
# prepare data | |
correction = 0.0 | |
if n_diff > 0: | |
correction = history[-n_diff] | |
history = difference(history, n_diff) | |
x_input = np.array(history[-n_input:]).reshape((1, n_input, 1)) | |
# forecast | |
yhat = model.predict(x_input, verbose=0) | |
return correction + yhat[0] | |
##################################### | |
import pandas as pd | |
car_sales_data = pd.read_csv('https://gist.githubusercontent.com/GermanCM/3048fec12c7c4c9def80c77be9b3b693/raw/88c8efa1f153a03446a6de4b2f1aad942a7db3fc/china_electric_vehicles_sales.csv') | |
sales_series_values = car_sales_data.sales | |
n_test = int(0.1*len(sales_series_values)) | |
# define config: n_input, n_nodes, n_epochs, n_batch, n_diff | |
config = [6, None, 100, 100, 0] | |
# grid search | |
scores, predictions_matrix = repeat_evaluate(sales_series_values, n_test, config) | |
# summarize scores: n_diff = 0, SIN DIFFERENCING, apply_log = True | |
summarize_scores('LSTM', scores) | |
# summarize scores: n_diff = 0, SIN DIFFERENCING, apply_log = True | |
summarize_scores('LSTM with bayesian hiperametrization + walk-forward', scores) |
Hello Pereira, you can find the data source on this gist link; also, copy again ther gist code with some updated lines ;)
On the other hand, I recommend to you to have a look at this other way of making a time series forecast (ready also to use more than one feature as input).
When I run this code I an error and it does not finish...
Search: Running Trial #7
Hyperparameter |Value |Best Value So Far
units |352 |32
learning_rate |0.01 |0.01
Epoch 1/5
2/2 [==============================] - 1s 302ms/step - loss: 957616.8125 - mse: 957616.8125 - val_loss: 4251291.5000 - val_mse: 4251291.5000
Epoch 2/5
2/2 [==============================] - 0s 47ms/step - loss: 974074.1875 - mse: 974074.1875 - val_loss: 9279387.0000 - val_mse: 9279387.0000
Epoch 3/5
2/2 [==============================] - 0s 94ms/step - loss: 721412.1875 - mse: 721412.1875 - val_loss: 6747038.0000 - val_mse: 6747038.0000
Epoch 4/5
2/2 [==============================] - 0s 89ms/step - loss: 552105.6875 - mse: 552105.6875 - val_loss: 2959386.0000 - val_mse: 2959386.0000
Epoch 5/5
2/2 [==============================] - 0s 45ms/step - loss: 588104.4375 - mse: 588104.4375 - val_loss: 7050130.5000 - val_mse: 7050130.5000
Epoch 1/5
2/2 [==============================] - 2s 276ms/step - loss: 1016230.5625 - mse: 1016230.5625 - val_loss: 4832428.0000 - val_mse: 4832428.0000
Epoch 2/5
2/2 [==============================] - 0s 38ms/step - loss: 2784067.5000 - mse: 2784067.5000 - val_loss: 8391198.0000 - val_mse: 8391198.0000
Epoch 3/5
2/2 [==============================] - 0s 42ms/step - loss: 664607.6250 - mse: 664607.6250 - val_loss: 3311314.2500 - val_mse: 3311314.2500
Epoch 4/5
2/2 [==============================] - 0s 41ms/step - loss: 552263.5000 - mse: 552263.5000 - val_loss: 6294855.0000 - val_mse: 6294855.0000
Epoch 5/5
2/2 [==============================] - 0s 92ms/step - loss: 545044.0000 - mse: 545044.0000 - val_loss: 5272206.0000 - val_mse: 5272206.0000
Trial 7 Complete [00h 00m 04s]
mse: 548574.84375
Best mse So Far: 483693.953125
Total elapsed time: 00h 00m 33s
INFO:tensorflow:Oracle triggered exit
Traceback (most recent call last):
File ~\anaconda3\lib\site-packages\pandas\core\indexes\range.py:385 in get_loc
return self._range.index(new_key)
ValueError: 0 is not in range
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File C:\Python\untitled0.py:162 in
scores, predictions_matrix = repeat_evaluate(sales_series_values, n_test, config)
File C:\Python\untitled0.py:106 in repeat_evaluate
score, predictions = walk_forward_validation(n_test, config, train)
File C:\Python\untitled0.py:87 in walk_forward_validation
history.append(test[i])
File ~\anaconda3\lib\site-packages\pandas\core\series.py:958 in getitem
return self._get_value(key)
File ~\anaconda3\lib\site-packages\pandas\core\series.py:1069 in _get_value
loc = self.index.get_loc(label)
File ~\anaconda3\lib\site-packages\pandas\core\indexes\range.py:387 in get_loc
raise KeyError(key) from err
KeyError: 0
Hi GermanCM...I saw that you referenced Jason Brownlee's book on time series forecasting.
I have been trying to apply Bayesian Optimization to Listing 19.13 in Deep Learning For Time-Series Forecasting for over 2 years!
This must be a very difficult problem because I have seen no examples in two years of anyone attempting to apply Bayesian Optimization to time series forecasting.
Your work is the closest I have seen to a solution, however, I still cannot get it to work.
Can you help with this?
When run the code i get a error:
'None' type object is not subscriptable.
---> train_x, train_y = data[:, :-1] data[:,-1]