Skip to content

Instantly share code, notes, and snippets.

@conormm
Last active September 26, 2018 09:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save conormm/6d72fea77169d9b21c91269868d6a83d to your computer and use it in GitHub Desktop.
Save conormm/6d72fea77169d9b21c91269868d6a83d to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
sns.set_style("whitegrid")
get_ipython().run_line_magic('matplotlib', 'inline')
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))
class Environment:
def __init__(self, variants, payouts, n_trials):
self.variants = variants
self.payouts = payouts
self.n_trials = n_trials
self.total_reward = 0
self.n_k = len(variants)
self.shape = (self.n_k, n_trials)
def run(self, agent):
"""Run the simulation with the agent.
agent must be a class with choose_k and update methods."""
for i in range(self.n_trials):
# agent makes a choice
x_chosen = agent.choose_k()
# Environment returns reward
reward = np.random.binomial(1, p=self.payouts[x_chosen])
# agent learns of reward
agent.reward = reward
# agent updates parameters based on the data
agent.update()
self.total_reward += reward
agent.collect_data()
return self.total_reward
class ThompsonSampler:
def __init__(self, env):
self.env = env
self.n_samples = 100
self.shape = (env.n_k, self.n_samples)
self.variants = env.variants
self.n_trials = env.n_trials
self.payouts = env.payouts
self.ad_i = np.zeros(env.n_trials)
self.r_i = np.zeros(env.n_trials)
self.regret_i = np.zeros(env.n_trials)
self.total_reward = 0
self.a = np.ones(env.n_k)
self.b = np.ones(env.n_k)
self.beta_post = np.random.uniform(0, 1, size=self.shape)
self.thetam = np.zeros(env.n_k)
self.data = None
self.reward = 0
self.k = 0
self.i = 0
def choose_k(self):
self.beta_post[self.k, :] = np.random.beta(self.a[self.k], self.b[self.k], size=self.shape)[self.k]
for self.k in range(self.env.n_k):
# sample from posterior (this is the thompson sampling approach)
# this leads to more exploration because machines with > uncertainty can then be selected as the machine
#xpost[k, :] = xpost[k, :][np.round(self.beta_post[k, :], 3) != 0]
self.thetam[self.k] = np.random.choice(self.beta_post[self.k, :])
# select machine with highest posterior p of payout
self.k = self.variants[np.argmax(self.thetam)]
return self.k
def update(self):
self.regret_i[self.i] = np.max(self.beta_post) - self.thetam[self.k]
#update dist (a, b) = (a, b) + (r, 1 - r)
self.a[self.k] += self.reward
self.b[self.k] += 1 - self.reward # i.e. only increment b when it's a swing and a miss. 1 - 0 = 1, 1 - 1 = 0
self.total_reward += self.reward
self.ad_i[self.i] = self.k
self.r_i[self.i] = self.reward
self.i += 1
def collect_data(self):
self.data = pd.DataFrame(dict(ad=self.ad_i, reward=self.r_i, regret=self.regret_i))
machines = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
payouts = [0.023, 0.001, 0.029, 0.001, 0.002, 0.04, 0.0234, 0.002, 0.01, 0.0121, .3]
en = Environment(machines, payouts, 10000)
tsa = ThompsonSampler(env=en)
en.run(agent=tsa)
plt.figure(figsize=(22, 14))
# plot 1
en = Environment(machines, payouts, 10)
tsa = ThompsonSampler(env=en)
n_rounds = 0
plt.subplot(231)
for i in range(len(machines)):
sns.distplot(tsa.beta_post[i], hist=False, label=str(i))
plt.title(f"Prior distribution for each variant (uniform between 0 and 1)")
plt.legend();
# plot 2
en = Environment(machines, payouts, n_rounds)
tsa = ThompsonSampler(env=en)
en.run(agent=tsa)
n_rounds = 500
plt.subplot(232)
for i in range(len(machines)):
sns.distplot(tsa.beta_post[i], hist=False, label=str(i))
plt.title(f"Beta distributions after {n_rounds}")
plt.legend();
# plot 3
en = Environment(machines, payouts, n_rounds)
tsa = ThompsonSampler(env=en)
en.run(agent=tsa)
n_rounds = 1000
plt.subplot(233)
for i in range(len(machines)):
sns.distplot(tsa.beta_post[i], hist=False, label=str(i))
plt.title(f"Beta distributions after {n_rounds}")
plt.legend();
# plot 4
en = Environment(machines, payouts, n_rounds)
tsa = ThompsonSampler(env=en)
en.run(agent=tsa)
n_rounds = 5000
plt.subplot(234)
for i in range(len(machines)):
sns.distplot(tsa.beta_post[i], hist=False, label=str(i))
plt.title(f"Beta distributions after {n_rounds}")
plt.legend();
# plot 5
en = Environment(machines, payouts, n_rounds)
tsa = ThompsonSampler(env=en)
en.run(agent=tsa)
n_rounds = 10000
plt.subplot(235)
for i in range(len(machines)):
sns.distplot(tsa.beta_post[i], hist=False, label=str(i))
plt.title(f"Beta distributions after {n_rounds}")
plt.legend();
# plot 6
en = Environment(machines, payouts, n_rounds)
tsa = ThompsonSampler(env=en)
en.run(agent=tsa)
n_rounds = 20000
plt.subplot(236)
for i in range(len(machines)):
sns.distplot(tsa.beta_post[i], hist=False, label=str(i))
plt.title(f"Beta distributions after {n_rounds}")
plt.legend();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment