Created
January 25, 2023 13:20
-
-
Save tomsen-san/66ec9e20594dc89ce73ad77283ff05a0 to your computer and use it in GitHub Desktop.
pymc model and data generator for analyzing runtime behaviour of pymc3 vs pymc5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import numpy as np | |
import pandas as pd | |
from scipy.stats import norm, lognorm, halfnorm, nbinom | |
## generates data NBD distributed. We have a variable number of deltas between purchase events in days per customer | |
num_customers = 10000 # generate data for 10k customers at once | |
data = [] | |
num_deltas_dist = halfnorm(loc=10, scale=10) | |
mu_mu_dist = norm(loc=3, scale=0.5) | |
mu_sigma_dist = halfnorm(scale=0.1) | |
alpha_sigma_dist = halfnorm(scale=0.3) | |
for customer in range(num_customers): | |
mu_mu = mu_mu_dist.rvs() | |
mu_sigma = mu_sigma_dist.rvs() | |
alpha_sigma = alpha_sigma_dist.rvs() | |
mu_dist = lognorm(s=mu_sigma, scale=np.exp(mu_mu)) | |
alpha_dist = halfnorm(loc=2.5, scale=alpha_sigma) | |
mu = mu_dist.rvs() | |
alpha = alpha_dist.rvs() | |
p = alpha / (mu + alpha) | |
n = alpha | |
customer_nbd = nbinom(n, p) | |
num_deltas = int(num_deltas_dist.rvs()) | |
deltas = customer_nbd.rvs(size=num_deltas) | |
for delta in deltas: | |
data.append({ | |
'customerId': customer, | |
'delta': delta+1, | |
}) | |
generated = pd.DataFrame(data=data) | |
## model | |
import pymc3, time | |
def generate_pymc3_times(num_customers_list, data): | |
times = [] | |
for num_customers in num_customers_list: | |
selected_customers = range(num_customers) | |
select_cd = data[data.customerId.isin(selected_customers)].copy() | |
with pymc3.Model() as deltas_centered_hierarchical_model: | |
# hyper priors | |
mu_mu = pymc3.Normal("μ_mu", mu=3, sigma=1) | |
mu_sigma = pymc3.HalfNormal("μ_sigma", sigma=1) | |
alpha_sigma = pymc3.HalfNormal("α_sigma", sigma=1) | |
# model | |
mu_c = pymc3.Lognormal('μ', mu=mu_mu, sigma=mu_sigma, shape=num_customers) | |
alpha_c = pymc3.HalfNormal('α', sigma=alpha_sigma, shape=num_customers) | |
delta_customer = pymc3.NegativeBinomial("Δ", mu=mu_c[select_cd.customer_idx.values], alpha=alpha_c[select_cd.customer_idx.values], observed=select_cd['delta']) | |
start_time = round(time.time()) | |
trace_partial_pooled = pymc3.sample(random_seed=2412, chains=4, draws=2000, return_inferencedata=False) | |
prior_partial_pooled = pymc3.sample_prior_predictive() | |
posterior_predictive_partial_pooled = pymc3.sample_posterior_predictive(trace_partial_pooled) | |
end_time = round(time.time()) | |
times.append(end_time - start_time) | |
return times | |
generated["customer_idx"] = generated["customerId"] | |
num_customers_list = [100, 500, 1000, 2000, 3000, 4000] | |
pymc3_times = generate_pymc3_times(num_customers_list, generated) | |
## same for pymc5 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment