Skip to content

Instantly share code, notes, and snippets.

@andrewm4894
Created January 23, 2019 15:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewm4894/58179c6e240163a45dc2acff1843f7f2 to your computer and use it in GitHub Desktop.
Save andrewm4894/58179c6e240163a45dc2acff1843f7f2 to your computer and use it in GitHub Desktop.
def data_reshape_for_model(data_in,n_timesteps,n_features,print_info=True):
''' Function to reshape the data into model ready format, either for training or prediction.
'''
# get original data shape
data_in_shape = data_in.shape
# create a dummy row with desired shape and one empty observation
data_out = np.zeros((1,n_timesteps,n_features))
# loop though each row of data and reshape accordingly
for row in range(len(data_in)):
# for each row look ahead as many timesteps as needed and then transpose the data to give shape keras wants
tmp_array = np.array([data_in[row:(row+n_timesteps),].transpose()])
# if your reshaped data is as expected then concate the new observation into data_out
if tmp_array.shape == (1,n_timesteps,n_features):
data_out = np.concatenate((data_out,tmp_array))
# drop first dummy row of data_out
data_out = data_out[1:]
# get output data shape
data_out_shape = data_out.shape
if print_info: print(f'{data_in_shape} -> {data_out_shape}')
return data_out
def train(model,data,n_epochs=10,batch_size=50,print_info=False,callbacks=None,shuffle=False,verbose=1):
''' Function to take in model and data and train the model using defined params.
'''
# fit the model to the data
model.fit(data, data, epochs=n_epochs, batch_size=batch_size,
validation_data=(data, data), verbose=verbose, shuffle=shuffle,
callbacks=callbacks)
return model
def predict(model,data,print_info=True):
''' Function to take in model and data and return predictions in model data format.
'''
# get prediction from model
yhat = model.predict(data)
if print_info: print(yhat.shape)
return yhat
def model_data_to_df_long(data,n_timesteps,n_features):
''' Function to take model data numpy array and translate it into a long format dataframe.
'''
# define empty list to collect data into
data_tmp = []
# for each row in the data
for r in range(len(data)):
row = data[r]
# for each feature in each row
for f in range(n_features):
# for each timestep of each feature in each row
for t in range(n_timesteps):
# add an element to the list decoding what it represents
tmp = [r,f'f{f}',f't{t}',row[f,t]]
# append that element to the data
data_tmp.append(tmp)
# now use the collected data to create a pandas df
df_long = pd.DataFrame(data_tmp,columns=['row','feature','timestep','value'])
# add a label col that can be used to go from long format to wide
df_long['label'] = df_long['feature'] + '_' + df_long['timestep']
return df_long
def model_df_long_to_wide(df_long,key_col='label'):
''' Function that can translate a long formant model data df into a wide version of it.
'''
# use pivot to go from long to wide
df_wide = df_long[['row','label','value']].pivot(index='row',columns=key_col,values='value')
return df_wide
def df_out_add_errors(df_out,n_timesteps,n_features):
''' Function to take in a df_out type df and add in error columns
'''
# loop through to get errors
f_cols = [f'f{f}' for f in range(n_features)]
t_cols = [f't{t}' for t in range(n_timesteps)]
for f_col in f_cols:
for t_col in t_cols:
lag = int(t_col.replace('t','')) + 1
df_out[f'{f_col}_{t_col}_error'] = df_out[f_col].shift(lag*-1) - df_out[f'{f_col}_{t_col}_yhat']
# get summary error metrics by timestep across all features
for t_col in t_cols:
df_out[f'{t_col}_error_avg'] = df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].mean(axis=1)
df_out[f'{t_col}_error_med'] = df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].median(axis=1)
df_out[f'{t_col}_error_min'] = df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].min(axis=1)
df_out[f'{t_col}_error_max'] = df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].max(axis=1)
df_out[f'{t_col}_error_rng'] = df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].max(axis=1) - df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].min(axis=1)
return df_out
def yhat_to_df_out(data_train,yhat,n_timesteps,n_features):
''' Function to take data train and yhat prediction output array from model and turn it into final form of df_out.
'''
# helper df's
df_train_long = model_data_to_df_long(data_train,n_timesteps,n_features)
df_yhat_long = model_data_to_df_long(yhat,n_timesteps,n_features)
df_train_wide = model_df_long_to_wide(df_train_long)
df_yhat_wide = model_df_long_to_wide(df_yhat_long)
df_yhat_wide.columns = [f'{col}_yhat' for col in df_yhat_wide.columns]
# begin process to collect final data frame
# make df_out
train_cols_latest = [col for col in df_train_wide.columns if f't{n_timesteps-1}' in col]
df_out = df_train_wide[train_cols_latest]
# clean up col names
df_out.columns = [col.split('_')[0] for col in df_out.columns]
# now concat train cols and cols from df_yhat_wide
df_out = pd.concat([df_out,df_yhat_wide],axis=1)
# add in error cols
df_out = df_out_add_errors(df_out,n_timesteps,n_features)
return df_out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment