Skip to content

Instantly share code, notes, and snippets.

@liannewriting
Last active February 3, 2023 14:47
Show Gist options
  • Save liannewriting/656bb40f0f3a71c09f2c3a5b80e34823 to your computer and use it in GitHub Desktop.
Save liannewriting/656bb40f0f3a71c09f2c3a5b80e34823 to your computer and use it in GitHub Desktop.
lstm-keras-tensorflow-time-series-202003
# Goal of the model:
# Predict Global_active_power at a specified time in the future.
# Eg. We want to predict how much Global_active_power will be ten minutes from now.
# We can use all the values from t-1, t-2, t-3, .... t-history_length to predict t+10
def create_ts_files(dataset,
start_index,
end_index,
history_length,
step_size,
target_step,
num_rows_per_file,
data_folder):
assert step_size > 0
assert start_index >= 0
if not os.path.exists(data_folder):
os.makedirs(data_folder)
time_lags = sorted(range(target_step+1, target_step+history_length+1, step_size), reverse=True)
col_names = [f'x_lag{i}' for i in time_lags] + ['y']
start_index = start_index + history_length
if end_index is None:
end_index = len(dataset) - target_step
rng = range(start_index, end_index)
num_rows = len(rng)
num_files = math.ceil(num_rows/num_rows_per_file)
# for each file.
print(f'Creating {num_files} files.')
for i in range(num_files):
filename = f'{data_folder}/ts_file{i}.pkl'
if i % 10 == 0:
print(f'{filename}')
# get the start and end indices.
ind0 = i*num_rows_per_file + start_index
ind1 = min(ind0 + num_rows_per_file, end_index)
data_list = []
# j in the current timestep. Will need j-n to j-1 for the history. And j + target_step for the target.
for j in range(ind0, ind1):
indices = range(j-1, j-history_length-1, -step_size)
data = dataset[sorted(indices) + [j+target_step]]
# append data to the list.
data_list.append(data)
df_ts = pd.DataFrame(data=data_list, columns=col_names)
df_ts.to_pickle(filename)
return len(col_names)-1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment