Skip to content

Instantly share code, notes, and snippets.

@h-okay
h-okay / compare.csv
Created January 4, 2022 10:42
for medium
CustomerID RFM Segment Recency T Frequency Monetary 1M CLV 1M Scaled CLV 12M CLV 12M Scaled CLV
16000 potential_loyalists 0.0 0.428571 3 2055.786667 3843.408761 0.258216 39233.195047 0.239831
15061 champions 52.57143 53.28571 48 1108.30781 3670.30953 0.24659 40347.77563 0.24664
@h-okay
h-okay / cltv.py
Last active January 5, 2022 16:12
segments
final["cltv_segment"] = pd.qcut(final["6m_scaled_clv"], 4, labels=["D", "C", "B", "A"])
@h-okay
h-okay / cltv.py
Last active January 5, 2022 16:12
add_clv
final = rfm[['rfm_segment']].merge(cltv_final, on='Customer ID') # Merging RFM
final.rename(columns={'scaled_clv': '6m_scaled_clv', 'clv': '6m_clv'},
inplace=True)
final = add_clv(final, cltv_df, 1)
final = add_clv(final, cltv_df, 12)
@h-okay
h-okay / cltv.py
Last active January 5, 2022 16:12
adding_rfm
rfm = create_rfm(df) # Creating RFM
rfm.rename(columns={'segment': 'rfm_segment'}, inplace=True)
rfm.index = rfm.index.astype('int64')
@h-okay
h-okay / cltv.py
Last active January 5, 2022 16:12
modelling
# Models
bgf = BetaGeoFitter(penalizer_coef=0.001)
bgf.fit(cltv_df['frequency'],
cltv_df['recency'],
cltv_df['T'])
ggf = GammaGammaFitter(penalizer_coef=0.01)
ggf.fit(cltv_df['frequency'], cltv_df['monetary'])
cltv = ggf.customer_lifetime_value(bgf,
@h-okay
h-okay / cltv.py
Last active January 5, 2022 16:12
data_prep
replace_with_thresholds(df, 'Price')
replace_with_thresholds(df, 'Quantity')
df["TotalPrice"] = df["Quantity"] * df["Price"]
today_date = dt.datetime(2011, 12, 11) # Data we use is old. We need a proper analysis date.
cltv_df = df.groupby('Customer ID').agg(
{'InvoiceDate': [lambda date: (date.max() - date.min()).days,
lambda date: (today_date - date.min()).days],
@h-okay
h-okay / level_based_eda.py
Last active December 20, 2021 14:28
EDA for Level Based
# Unique Sources and Frequencies
df.SOURCE.value_counts()
# Unique Prices and Frequencies
df.PRICE.value_counts()
# Unique Countries and Frequencies
df.COUNTRY.value_counts()
# Total Income Accumulated by Countries