Last active
February 3, 2023 14:47
-
-
Save liannewriting/656bb40f0f3a71c09f2c3a5b80e34823 to your computer and use it in GitHub Desktop.
lstm-keras-tensorflow-time-series-202003
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Goal of the model: | |
# Predict Global_active_power at a specified time in the future. | |
# Eg. We want to predict how much Global_active_power will be ten minutes from now. | |
# We can use all the values from t-1, t-2, t-3, .... t-history_length to predict t+10 | |
def create_ts_files(dataset, | |
start_index, | |
end_index, | |
history_length, | |
step_size, | |
target_step, | |
num_rows_per_file, | |
data_folder): | |
assert step_size > 0 | |
assert start_index >= 0 | |
if not os.path.exists(data_folder): | |
os.makedirs(data_folder) | |
time_lags = sorted(range(target_step+1, target_step+history_length+1, step_size), reverse=True) | |
col_names = [f'x_lag{i}' for i in time_lags] + ['y'] | |
start_index = start_index + history_length | |
if end_index is None: | |
end_index = len(dataset) - target_step | |
rng = range(start_index, end_index) | |
num_rows = len(rng) | |
num_files = math.ceil(num_rows/num_rows_per_file) | |
# for each file. | |
print(f'Creating {num_files} files.') | |
for i in range(num_files): | |
filename = f'{data_folder}/ts_file{i}.pkl' | |
if i % 10 == 0: | |
print(f'{filename}') | |
# get the start and end indices. | |
ind0 = i*num_rows_per_file + start_index | |
ind1 = min(ind0 + num_rows_per_file, end_index) | |
data_list = [] | |
# j in the current timestep. Will need j-n to j-1 for the history. And j + target_step for the target. | |
for j in range(ind0, ind1): | |
indices = range(j-1, j-history_length-1, -step_size) | |
data = dataset[sorted(indices) + [j+target_step]] | |
# append data to the list. | |
data_list.append(data) | |
df_ts = pd.DataFrame(data=data_list, columns=col_names) | |
df_ts.to_pickle(filename) | |
return len(col_names)-1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment