Skip to content

Instantly share code, notes, and snippets.

@tomsen-san
Created January 25, 2023 13:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomsen-san/66ec9e20594dc89ce73ad77283ff05a0 to your computer and use it in GitHub Desktop.
Save tomsen-san/66ec9e20594dc89ce73ad77283ff05a0 to your computer and use it in GitHub Desktop.
pymc model and data generator for analyzing runtime behaviour of pymc3 vs pymc5
import os
import numpy as np
import pandas as pd
from scipy.stats import norm, lognorm, halfnorm, nbinom
## generates data NBD distributed. We have a variable number of deltas between purchase events in days per customer
num_customers = 10000 # generate data for 10k customers at once
data = []
num_deltas_dist = halfnorm(loc=10, scale=10)
mu_mu_dist = norm(loc=3, scale=0.5)
mu_sigma_dist = halfnorm(scale=0.1)
alpha_sigma_dist = halfnorm(scale=0.3)
for customer in range(num_customers):
mu_mu = mu_mu_dist.rvs()
mu_sigma = mu_sigma_dist.rvs()
alpha_sigma = alpha_sigma_dist.rvs()
mu_dist = lognorm(s=mu_sigma, scale=np.exp(mu_mu))
alpha_dist = halfnorm(loc=2.5, scale=alpha_sigma)
mu = mu_dist.rvs()
alpha = alpha_dist.rvs()
p = alpha / (mu + alpha)
n = alpha
customer_nbd = nbinom(n, p)
num_deltas = int(num_deltas_dist.rvs())
deltas = customer_nbd.rvs(size=num_deltas)
for delta in deltas:
data.append({
'customerId': customer,
'delta': delta+1,
})
generated = pd.DataFrame(data=data)
## model
import pymc3, time
def generate_pymc3_times(num_customers_list, data):
times = []
for num_customers in num_customers_list:
selected_customers = range(num_customers)
select_cd = data[data.customerId.isin(selected_customers)].copy()
with pymc3.Model() as deltas_centered_hierarchical_model:
# hyper priors
mu_mu = pymc3.Normal("μ_mu", mu=3, sigma=1)
mu_sigma = pymc3.HalfNormal("μ_sigma", sigma=1)
alpha_sigma = pymc3.HalfNormal("α_sigma", sigma=1)
# model
mu_c = pymc3.Lognormal('μ', mu=mu_mu, sigma=mu_sigma, shape=num_customers)
alpha_c = pymc3.HalfNormal('α', sigma=alpha_sigma, shape=num_customers)
delta_customer = pymc3.NegativeBinomial("Δ", mu=mu_c[select_cd.customer_idx.values], alpha=alpha_c[select_cd.customer_idx.values], observed=select_cd['delta'])
start_time = round(time.time())
trace_partial_pooled = pymc3.sample(random_seed=2412, chains=4, draws=2000, return_inferencedata=False)
prior_partial_pooled = pymc3.sample_prior_predictive()
posterior_predictive_partial_pooled = pymc3.sample_posterior_predictive(trace_partial_pooled)
end_time = round(time.time())
times.append(end_time - start_time)
return times
generated["customer_idx"] = generated["customerId"]
num_customers_list = [100, 500, 1000, 2000, 3000, 4000]
pymc3_times = generate_pymc3_times(num_customers_list, generated)
## same for pymc5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment