Last active
June 6, 2022 20:38
-
-
Save hassanj47/ab597fd8af0c1325821290c462d1ae1d to your computer and use it in GitHub Desktop.
preprocessing for time series survival model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Filtering the departments in config | |
sales = sales[sales.dept_id.isin(DEPTS)] | |
# sort data | |
sales = sales.sort_values(by=['item_id', 'dept_id', 'cat_id', 'store_id', | |
'state_id']) | |
# melting sales columns into one column | |
index_columns = ['id','item_id','dept_id','cat_id','store_id','state_id'] | |
sales = pd.melt(sales, | |
id_vars = index_columns, | |
var_name = 'd', | |
value_name = 'sales') | |
# dropping unnecessary columns | |
sales = sales.drop(['cat_id','state_id','id'], axis=1) | |
# rolling up store level data on item level | |
sales = sales.groupby(['dept_id','item_id','d']).agg({'sales':np.sum})\ | |
.reset_index() | |
# converting column 'd' to int type for sorting on date | |
sales['d'] = [val.replace('d_','') for val in sales.d] | |
sales['d'] = pd.to_numeric(sales.d) | |
sales = sales.sort_values(by=['dept_id','item_id','d'], ignore_index=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment