Skip to content

Instantly share code, notes, and snippets.

@erikbern
Last active January 15, 2021 00:50
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save erikbern/65660dc8aa5df99b5f84 to your computer and use it in GitHub Desktop.
Save erikbern/65660dc8aa5df99b5f84 to your computer and use it in GitHub Desktop.
MCMC for simple marketing data
import pymc, pymc.graph
import matplotlib.pyplot as plt
import numpy as np
import sys
channels = [
('A', 2292.04, 9),
('B', 1276.85, 2),
('C', 139.59, 3),
('D', 954.98, 5)
]
n_burn, n_samples = 1000, int(sys.argv[1])
for channel, spend_obs, transactions_obs in channels:
# Let's say the cost per lead is uniform between 0 and 100000
# pymc forces you to pick an upper bound, so let's pick a silly one
c = pymc.Uniform('c', 0, 100000)
# Now let's assume the *expected* number of transactions is really just
# the total number of money spent divided by the cost per lead
@pymc.deterministic
def e(c=c):
return spend_obs / c
# The observed number of transactions is a Poisson with mu set to the expectations
a = pymc.Poisson('a', mu=e, observed=True, value=transactions_obs)
model = pymc.MCMC([c, e, a])
graph = pymc.graph.graph(model)
graph.write_png("graph.png")
model.sample(n_samples, n_burn)
ys, xs = np.histogram(model.trace('c')[:], range=(0, 500), bins=100)
xs = (xs[1:] + xs[:-1])/2
plt.plot(xs, ys)
plt.legend([c for c, _, _ in channels])
plt.gca()
plt.title('%d samples (%d burn)' % (n_samples, n_burn))
plt.xlabel('Cost per transaction')
plt.ylabel('Probability density')
plt.savefig('marketing_mc_%09d.png' % n_samples)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment