Skip to content

Instantly share code, notes, and snippets.

@dhananjaychaudhari26
Last active February 2, 2021 16:19
Show Gist options
  • Save dhananjaychaudhari26/5d342d4b19f1d78c2c0013e018815039 to your computer and use it in GitHub Desktop.
Save dhananjaychaudhari26/5d342d4b19f1d78c2c0013e018815039 to your computer and use it in GitHub Desktop.
mode = lambda x: Counter(x).most_common(1)[0][0]
aggs = {
 'purchase_amount' : ['sum','max','min','mean','var','skew'],
 'installments' : ['sum','max','mean','var','skew'],
 'purchase_date' : ['max','min'],
 'month_lag' : ['max','min','mean','var','skew'],
 'month_diff':['max','min','mean','var','skew'],
 'weekend' : ['sum', 'mean'],
 'weekday' : ['sum', 'mean'],
 'authorized_flag': ['sum', 'mean'],
 'category_1': ['sum','mean', 'max','min'],
 'category_2': ['sum','mean', 'max','min'],
 'category_3': ['sum','mean', 'max','min'],
 'card_id' : ['size','count'],
 'month': ['nunique', 'mean', 'min', 'max'],
 'hour': ['nunique', 'mean', 'min', 'max'],
 'weekofyear': ['nunique', 'mean', 'min', 'max'],
 'day': ['nunique', 'mean', 'min', 'max'],
 'subsector_id': ['nunique',],
 'merchant_id': ['nunique',],
 'merchant_category_id' : ['nunique',],
 'price' :['sum','mean','max','min','var','skew'],
 'duration' : ['mean','min','max','var','skew'],
 'amount_month_ratio':['mean','min','max','var','skew'],
 'Christmas_Day_2017': ['mean'],
 'Mothers_Day_2017': ['mean'],
 'fathers_day_2017': ['mean'],
 'Children_day_2017': ['mean'],
 'Valentine_Day_2017': ['mean'],
 'Black_Friday_2017': ['mean'],
 'Mothers_Day_2018' : ['mean'],
 "merchants_merchant_group_id": mode,
 "merchants_merchant_category_id": mode,
 "merchants_subsector_id": mode,
 "merchants_numerical_1": "mean",
 "merchants_numerical_2": "mean",
 "merchants_category_1": mode,
 "merchants_most_recent_sales_range": mode,
 "merchants_most_recent_purchases_range": mode,
 "merchants_avg_sales_lag3": "mean",
 "merchants_avg_purchases_lag3": "mean",
 "merchants_active_months_lag3": mode,
 "merchants_avg_sales_lag6": "mean",
 "merchants_avg_purchases_lag6": "mean",
 "merchants_active_months_lag6": mode,
 "merchants_avg_sales_lag12": "mean",
 "merchants_avg_purchases_lag12": "mean",
 "merchants_active_months_lag12": mode,
 "merchants_category_4": mode,
 "merchants_city_id": mode,
 "merchants_state_id": mode,
 "merchants_category_2": mode
}
hist_trans_df = hist_trans_df.reset_index().groupby('card_id').agg(aggs) # perform agg on new transactions
hist_trans_df.columns = pd.Index([e[0] + "_" + e[1] for e in hist_trans_df.columns.tolist()])
hist_trans_df.columns = ['hist_'+ c for c in hist_trans_df.columns] # change column name for each aggregeted columns.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment