Created
January 23, 2019 15:37
-
-
Save andrewm4894/58179c6e240163a45dc2acff1843f7f2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def data_reshape_for_model(data_in,n_timesteps,n_features,print_info=True): | |
''' Function to reshape the data into model ready format, either for training or prediction. | |
''' | |
# get original data shape | |
data_in_shape = data_in.shape | |
# create a dummy row with desired shape and one empty observation | |
data_out = np.zeros((1,n_timesteps,n_features)) | |
# loop though each row of data and reshape accordingly | |
for row in range(len(data_in)): | |
# for each row look ahead as many timesteps as needed and then transpose the data to give shape keras wants | |
tmp_array = np.array([data_in[row:(row+n_timesteps),].transpose()]) | |
# if your reshaped data is as expected then concate the new observation into data_out | |
if tmp_array.shape == (1,n_timesteps,n_features): | |
data_out = np.concatenate((data_out,tmp_array)) | |
# drop first dummy row of data_out | |
data_out = data_out[1:] | |
# get output data shape | |
data_out_shape = data_out.shape | |
if print_info: print(f'{data_in_shape} -> {data_out_shape}') | |
return data_out | |
def train(model,data,n_epochs=10,batch_size=50,print_info=False,callbacks=None,shuffle=False,verbose=1): | |
''' Function to take in model and data and train the model using defined params. | |
''' | |
# fit the model to the data | |
model.fit(data, data, epochs=n_epochs, batch_size=batch_size, | |
validation_data=(data, data), verbose=verbose, shuffle=shuffle, | |
callbacks=callbacks) | |
return model | |
def predict(model,data,print_info=True): | |
''' Function to take in model and data and return predictions in model data format. | |
''' | |
# get prediction from model | |
yhat = model.predict(data) | |
if print_info: print(yhat.shape) | |
return yhat | |
def model_data_to_df_long(data,n_timesteps,n_features): | |
''' Function to take model data numpy array and translate it into a long format dataframe. | |
''' | |
# define empty list to collect data into | |
data_tmp = [] | |
# for each row in the data | |
for r in range(len(data)): | |
row = data[r] | |
# for each feature in each row | |
for f in range(n_features): | |
# for each timestep of each feature in each row | |
for t in range(n_timesteps): | |
# add an element to the list decoding what it represents | |
tmp = [r,f'f{f}',f't{t}',row[f,t]] | |
# append that element to the data | |
data_tmp.append(tmp) | |
# now use the collected data to create a pandas df | |
df_long = pd.DataFrame(data_tmp,columns=['row','feature','timestep','value']) | |
# add a label col that can be used to go from long format to wide | |
df_long['label'] = df_long['feature'] + '_' + df_long['timestep'] | |
return df_long | |
def model_df_long_to_wide(df_long,key_col='label'): | |
''' Function that can translate a long formant model data df into a wide version of it. | |
''' | |
# use pivot to go from long to wide | |
df_wide = df_long[['row','label','value']].pivot(index='row',columns=key_col,values='value') | |
return df_wide | |
def df_out_add_errors(df_out,n_timesteps,n_features): | |
''' Function to take in a df_out type df and add in error columns | |
''' | |
# loop through to get errors | |
f_cols = [f'f{f}' for f in range(n_features)] | |
t_cols = [f't{t}' for t in range(n_timesteps)] | |
for f_col in f_cols: | |
for t_col in t_cols: | |
lag = int(t_col.replace('t','')) + 1 | |
df_out[f'{f_col}_{t_col}_error'] = df_out[f_col].shift(lag*-1) - df_out[f'{f_col}_{t_col}_yhat'] | |
# get summary error metrics by timestep across all features | |
for t_col in t_cols: | |
df_out[f'{t_col}_error_avg'] = df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].mean(axis=1) | |
df_out[f'{t_col}_error_med'] = df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].median(axis=1) | |
df_out[f'{t_col}_error_min'] = df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].min(axis=1) | |
df_out[f'{t_col}_error_max'] = df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].max(axis=1) | |
df_out[f'{t_col}_error_rng'] = df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].max(axis=1) - df_out[[col for col in df_out.columns if f'{t_col}_error' in col]].min(axis=1) | |
return df_out | |
def yhat_to_df_out(data_train,yhat,n_timesteps,n_features): | |
''' Function to take data train and yhat prediction output array from model and turn it into final form of df_out. | |
''' | |
# helper df's | |
df_train_long = model_data_to_df_long(data_train,n_timesteps,n_features) | |
df_yhat_long = model_data_to_df_long(yhat,n_timesteps,n_features) | |
df_train_wide = model_df_long_to_wide(df_train_long) | |
df_yhat_wide = model_df_long_to_wide(df_yhat_long) | |
df_yhat_wide.columns = [f'{col}_yhat' for col in df_yhat_wide.columns] | |
# begin process to collect final data frame | |
# make df_out | |
train_cols_latest = [col for col in df_train_wide.columns if f't{n_timesteps-1}' in col] | |
df_out = df_train_wide[train_cols_latest] | |
# clean up col names | |
df_out.columns = [col.split('_')[0] for col in df_out.columns] | |
# now concat train cols and cols from df_yhat_wide | |
df_out = pd.concat([df_out,df_yhat_wide],axis=1) | |
# add in error cols | |
df_out = df_out_add_errors(df_out,n_timesteps,n_features) | |
return df_out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment