meraldo-aliz/bayes.py

## bayes.py
from pymc3.math import log, exp, where
import pymc3 as pm
import numpy as np


# We use the "calibration" portion of the dataset to train the model
N = rfm_cal_holdout.shape[0] # number of customers
x = rfm_cal_holdout['frequency_cal'].values # repeat purchase frequency
t_x = rfm_cal_holdout['recency_cal'].values # recency
T = rfm_cal_holdout['T_cal'].values # time since first purchase (T)

# Modeling step
bgnbd_model = pm.Model()
with bgnbd_model:

    # Priors for r and alpha, the two Gamma parameters
    r = pm.TruncatedNormal('r', mu=8, sigma=7, lower=0, upper=40)
    alpha = pm.TruncatedNormal('alpha', mu=0.5, sigma=5, lower=0, upper=10)

    # Priors for a and b, the two Beta parameters
    a = pm.TruncatedNormal('a', mu=1, sigma=5, lower=0, upper=10)
    b = pm.TruncatedNormal('b', mu=1, sigma=5, lower=0, upper=10)

    # lambda_ (purchase rate) is modeled by Gamma, which is a child distribution of r and alpha
    lambda_ = pm.Gamma('lambda', alpha=r, beta=alpha, shape=N, testval=np.random.rand(N))

    # p (dropout probability) is modeled by Beta, which is a child distribution of a and b
    p = pm.Beta('p', alpha=a, beta=b, shape=N, testval=np.random.rand(N))

    def logp(x, t_x, T):
        """
        Loglikelihood function
        """
        delta_x = where(x>0, 1, 0)
        A1 = x*log(1-p) + x*log(lambda_) - lambda_*T
        A2 = (log(p) + (x-1)*log(1-p) + x*log(lambda_) - lambda_*t_x)
        A3 = log(exp(A1) + delta_x * exp(A2))
        return A3

    # Custom distribution for BG-NBD likelihood function
    loglikelihood = pm.DensityDist("loglikelihood", logp, observed={'x': x, 't_x': t_x, 'T': T})

# Sampling step
SEED = 8
SAMPLE_KWARGS = {
    'chains': 1,
    'draws': 4000,
    'tune': 1000,
    'target_accept': 0.7,
    'random_seed': [
        SEED,
    ]
}
with bgnbd_model:
    trace = pm.sample(**SAMPLE_KWARGS)

# It's a good practice to burn (discard) early samples
# these are likely to be obtained before convergence
# they aren't representative of our posteriors.
trace_trunc = trace[3000:]
	from pymc3.math import log, exp, where
	import pymc3 as pm
	import numpy as np


	# We use the "calibration" portion of the dataset to train the model
	N = rfm_cal_holdout.shape[0] # number of customers
	x = rfm_cal_holdout['frequency_cal'].values # repeat purchase frequency
	t_x = rfm_cal_holdout['recency_cal'].values # recency
	T = rfm_cal_holdout['T_cal'].values # time since first purchase (T)

	# Modeling step
	bgnbd_model = pm.Model()
	with bgnbd_model:

	# Priors for r and alpha, the two Gamma parameters
	r = pm.TruncatedNormal('r', mu=8, sigma=7, lower=0, upper=40)
	alpha = pm.TruncatedNormal('alpha', mu=0.5, sigma=5, lower=0, upper=10)

	# Priors for a and b, the two Beta parameters
	a = pm.TruncatedNormal('a', mu=1, sigma=5, lower=0, upper=10)
	b = pm.TruncatedNormal('b', mu=1, sigma=5, lower=0, upper=10)

	# lambda_ (purchase rate) is modeled by Gamma, which is a child distribution of r and alpha
	lambda_ = pm.Gamma('lambda', alpha=r, beta=alpha, shape=N, testval=np.random.rand(N))

	# p (dropout probability) is modeled by Beta, which is a child distribution of a and b
	p = pm.Beta('p', alpha=a, beta=b, shape=N, testval=np.random.rand(N))

	def logp(x, t_x, T):
	"""
	Loglikelihood function
	"""
	delta_x = where(x>0, 1, 0)
	A1 = xlog(1-p) + xlog(lambda_) - lambda_*T
	A2 = (log(p) + (x-1)log(1-p) + xlog(lambda_) - lambda_*t_x)
	A3 = log(exp(A1) + delta_x * exp(A2))
	return A3

	# Custom distribution for BG-NBD likelihood function
	loglikelihood = pm.DensityDist("loglikelihood", logp, observed={'x': x, 't_x': t_x, 'T': T})

	# Sampling step
	SEED = 8
	SAMPLE_KWARGS = {
	'chains': 1,
	'draws': 4000,
	'tune': 1000,
	'target_accept': 0.7,
	'random_seed': [
	SEED,
	]
	}
	with bgnbd_model:
	trace = pm.sample(**SAMPLE_KWARGS)

	# It's a good practice to burn (discard) early samples
	# these are likely to be obtained before convergence
	# they aren't representative of our posteriors.
	trace_trunc = trace[3000:]