Created
February 2, 2021 15:48
-
-
Save dhananjaychaudhari26/3a3b18e613e035e52059edf5200ca83c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mode = lambda x: Counter(x).most_common(1)[0][0] | |
aggs = { | |
'purchase_amount' : ['sum','max','min','mean','var','skew'], | |
'installments' : ['sum','max','mean','var','skew'], | |
'purchase_date' : ['max','min'], | |
'month_lag' : ['max','min','mean','var','skew'], | |
'month_diff':['max','min','mean','var','skew'], | |
'weekend' : ['sum', 'mean'], | |
'weekday' : ['sum', 'mean'], | |
'authorized_flag': ['sum', 'mean'], | |
'category_1': ['sum','mean', 'max','min'], | |
'category_2': ['sum','mean', 'max','min'], | |
'category_3': ['sum','mean', 'max','min'], | |
'card_id' : ['size','count'], | |
'month': ['nunique', 'mean', 'min', 'max'], | |
'hour': ['nunique', 'mean', 'min', 'max'], | |
'weekofyear': ['nunique', 'mean', 'min', 'max'], | |
'day': ['nunique', 'mean', 'min', 'max'], | |
'subsector_id': ['nunique',], | |
'merchant_id': ['nunique',], | |
'merchant_category_id' : ['nunique',], | |
'price' :['sum','mean','max','min','var','skew'], | |
'duration' : ['mean','min','max','var','skew'], | |
'amount_month_ratio':['mean','min','max','var','skew'], | |
'Christmas_Day_2017': ['mean'], | |
'Mothers_Day_2017': ['mean'], | |
'fathers_day_2017': ['mean'], | |
'Children_day_2017': ['mean'], | |
'Valentine_Day_2017': ['mean'], | |
'Black_Friday_2017': ['mean'], | |
'Mothers_Day_2018' : ['mean'], | |
"merchants_merchant_group_id": mode, | |
"merchants_merchant_category_id": mode, | |
"merchants_subsector_id": mode, | |
"merchants_numerical_1": "mean", | |
"merchants_numerical_2": "mean", | |
"merchants_category_1": mode, | |
"merchants_most_recent_sales_range": mode, | |
"merchants_most_recent_purchases_range": mode, | |
"merchants_avg_sales_lag3": "mean", | |
"merchants_avg_purchases_lag3": "mean", | |
"merchants_active_months_lag3": mode, | |
"merchants_avg_sales_lag6": "mean", | |
"merchants_avg_purchases_lag6": "mean", | |
"merchants_active_months_lag6": mode, | |
"merchants_avg_sales_lag12": "mean", | |
"merchants_avg_purchases_lag12": "mean", | |
"merchants_active_months_lag12": mode, | |
"merchants_category_4": mode, | |
"merchants_city_id": mode, | |
"merchants_state_id": mode, | |
"merchants_category_2": mode | |
} | |
new_trans_df = new_trans_df.reset_index().groupby('card_id').agg(aggs) # perform agg on new transactions | |
new_trans_df.columns = pd.Index([e[0] + "_" + e[1] for e in new_trans_df.columns.tolist()]) | |
new_trans_df.columns = ['new_'+ c for c in new_trans_df.columns] # change column name for each aggregeted columns. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment