AyishaR/finanacial_distress_time_based_df.py

## finanacial_distress_time_based_df.py
# Defining the time window, that is, how many timesteps to include
time_window = 5

# Dataframes that hold rows grouped by company
df_company_grouped = df.groupby('Company')

# Column values affected by time - all except Company, Time, Financial Distress and x80 (the categorical variable that was one-hot encoded)
time_affected_columns = [c for c in df.columns if c[0] == 'x']    # Starts with x

# Final dataframe
df_final = pd.DataFrame()

# For each company
for company in df_company_grouped:
    # If the comapny has timesteps greater than or equal to the time window, else discard
    if time_window <= len(company[1]):
        # Skipping time_window-1 rows from the beginning, and looping to till the end
        for row_num in range(time_window, len(company[1])+1):
            # picking the time_window th row
            df_temp = company[1].iloc[row_num-1, :]
            # Appending values from time_window-1 rows before that
            for i in range(time_window-1):
                df_temp_i = company[1].iloc[row_num-1-i][time_affected_columns]    # Pick necessary columns
                df_temp = pd.concat([df_temp, df_temp_i], axis = 0)    # Append values

            df_temp = df_temp.to_frame().transpose()    # Series to DataFrame

            df_final = pd.concat([df_final, df_temp])    # Add as row to final dataframe

# Reset index
df_final = df_final.reset_index(drop = True)
	# Defining the time window, that is, how many timesteps to include
	time_window = 5

	# Dataframes that hold rows grouped by company
	df_company_grouped = df.groupby('Company')

	# Column values affected by time - all except Company, Time, Financial Distress and x80 (the categorical variable that was one-hot encoded)
	time_affected_columns = [c for c in df.columns if c[0] == 'x'] # Starts with x

	# Final dataframe
	df_final = pd.DataFrame()

	# For each company
	for company in df_company_grouped:
	# If the comapny has timesteps greater than or equal to the time window, else discard
	if time_window <= len(company[1]):
	# Skipping time_window-1 rows from the beginning, and looping to till the end
	for row_num in range(time_window, len(company[1])+1):
	# picking the time_window th row
	df_temp = company[1].iloc[row_num-1, :]
	# Appending values from time_window-1 rows before that
	for i in range(time_window-1):
	df_temp_i = company[1].iloc[row_num-1-i][time_affected_columns] # Pick necessary columns
	df_temp = pd.concat([df_temp, df_temp_i], axis = 0) # Append values

	df_temp = df_temp.to_frame().transpose() # Series to DataFrame

	df_final = pd.concat([df_final, df_temp]) # Add as row to final dataframe

	# Reset index
	df_final = df_final.reset_index(drop = True)