Last active
January 5, 2022 16:12
-
-
Save h-okay/3a4bc23016018b820ac528e3c92eddcd to your computer and use it in GitHub Desktop.
data_prep
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
replace_with_thresholds(df, 'Price') | |
replace_with_thresholds(df, 'Quantity') | |
df["TotalPrice"] = df["Quantity"] * df["Price"] | |
today_date = dt.datetime(2011, 12, 11) # Data we use is old. We need a proper analysis date. | |
cltv_df = df.groupby('Customer ID').agg( | |
{'InvoiceDate': [lambda date: (date.max() - date.min()).days, | |
lambda date: (today_date - date.min()).days], | |
'Invoice': lambda num: num.nunique(), | |
'TotalPrice': lambda TotalPrice: TotalPrice.sum()}) | |
cltv_df.columns = cltv_df.columns.droplevel(0) | |
cltv_df.columns = ['recency', 'T', 'frequency', 'monetary'] | |
cltv_df["monetary"] = cltv_df["monetary"] / cltv_df["frequency"] | |
cltv_df = cltv_df[(cltv_df['frequency'] > 1)] | |
cltv_df["recency"] = cltv_df["recency"] / 7 | |
cltv_df["T"] = cltv_df["T"] / 7 # Tenure on weekly scale | |
cltv_df.index = cltv_df.index.astype('int64') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment