Skip to content

Instantly share code, notes, and snippets.

@meraldo-aliz
Last active September 14, 2023 04:51
Show Gist options
  • Save meraldo-aliz/89faafb68f66e7d8612b8e3bdc9a38ef to your computer and use it in GitHub Desktop.
Save meraldo-aliz/89faafb68f66e7d8612b8e3bdc9a38ef to your computer and use it in GitHub Desktop.
lifetimes
from pymc3.math import log, exp, where
import pymc3 as pm
import numpy as np
# We use the "calibration" portion of the dataset to train the model
N = rfm_cal_holdout.shape[0] # number of customers
x = rfm_cal_holdout['frequency_cal'].values # repeat purchase frequency
t_x = rfm_cal_holdout['recency_cal'].values # recency
T = rfm_cal_holdout['T_cal'].values # time since first purchase (T)
# Modeling step
bgnbd_model = pm.Model()
with bgnbd_model:
# Priors for r and alpha, the two Gamma parameters
r = pm.TruncatedNormal('r', mu=8, sigma=7, lower=0, upper=40)
alpha = pm.TruncatedNormal('alpha', mu=0.5, sigma=5, lower=0, upper=10)
# Priors for a and b, the two Beta parameters
a = pm.TruncatedNormal('a', mu=1, sigma=5, lower=0, upper=10)
b = pm.TruncatedNormal('b', mu=1, sigma=5, lower=0, upper=10)
# lambda_ (purchase rate) is modeled by Gamma, which is a child distribution of r and alpha
lambda_ = pm.Gamma('lambda', alpha=r, beta=alpha, shape=N, testval=np.random.rand(N))
# p (dropout probability) is modeled by Beta, which is a child distribution of a and b
p = pm.Beta('p', alpha=a, beta=b, shape=N, testval=np.random.rand(N))
def logp(x, t_x, T):
"""
Loglikelihood function
"""
delta_x = where(x>0, 1, 0)
A1 = x*log(1-p) + x*log(lambda_) - lambda_*T
A2 = (log(p) + (x-1)*log(1-p) + x*log(lambda_) - lambda_*t_x)
A3 = log(exp(A1) + delta_x * exp(A2))
return A3
# Custom distribution for BG-NBD likelihood function
loglikelihood = pm.DensityDist("loglikelihood", logp, observed={'x': x, 't_x': t_x, 'T': T})
# Sampling step
SEED = 8
SAMPLE_KWARGS = {
'chains': 1,
'draws': 4000,
'tune': 1000,
'target_accept': 0.7,
'random_seed': [
SEED,
]
}
with bgnbd_model:
trace = pm.sample(**SAMPLE_KWARGS)
# It's a good practice to burn (discard) early samples
# these are likely to be obtained before convergence
# they aren't representative of our posteriors.
trace_trunc = trace[3000:]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment