Created
January 22, 2021 14:05
-
-
Save AyishaR/87e74514ad7d15fb486c955f259302a9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Defining the time window, that is, how many timesteps to include | |
time_window = 5 | |
# Dataframes that hold rows grouped by company | |
df_company_grouped = df.groupby('Company') | |
# Column values affected by time - all except Company, Time, Financial Distress and x80 (the categorical variable that was one-hot encoded) | |
time_affected_columns = [c for c in df.columns if c[0] == 'x'] # Starts with x | |
# Final dataframe | |
df_final = pd.DataFrame() | |
# For each company | |
for company in df_company_grouped: | |
# If the comapny has timesteps greater than or equal to the time window, else discard | |
if time_window <= len(company[1]): | |
# Skipping time_window-1 rows from the beginning, and looping to till the end | |
for row_num in range(time_window, len(company[1])+1): | |
# picking the time_window th row | |
df_temp = company[1].iloc[row_num-1, :] | |
# Appending values from time_window-1 rows before that | |
for i in range(time_window-1): | |
df_temp_i = company[1].iloc[row_num-1-i][time_affected_columns] # Pick necessary columns | |
df_temp = pd.concat([df_temp, df_temp_i], axis = 0) # Append values | |
df_temp = df_temp.to_frame().transpose() # Series to DataFrame | |
df_final = pd.concat([df_final, df_temp]) # Add as row to final dataframe | |
# Reset index | |
df_final = df_final.reset_index(drop = True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment