Instantly share code, notes, and snippets.

# stucchio/beta_bandit.py Last active Mar 26, 2018

Beta-distribution Bandit
 from numpy import * from scipy.stats import beta class BetaBandit(object): def __init__(self, num_options=2, prior=(1.0,1.0)): self.trials = zeros(shape=(num_options,), dtype=int) self.successes = zeros(shape=(num_options,), dtype=int) self.num_options = num_options self.prior = prior def add_result(self, trial_id, success): self.trials[trial_id] = self.trials[trial_id] + 1 if (success): self.successes[trial_id] = self.successes[trial_id] + 1 def get_recommendation(self): sampled_theta = [] for i in range(self.num_options): #Construct beta distribution for posterior dist = beta(self.prior+self.successes[i], self.prior+self.trials[i]-self.successes[i]) #Draw sample from beta distribution sampled_theta += [ dist.rvs() ] # Return the index of the sample with the largest value return sampled_theta.index( max(sampled_theta) )
 from beta_bandit import * from numpy import * from scipy.stats import beta import random theta = (0.25, 0.35) def is_conversion(title): if random.random() < theta[title]: return True else: return False conversions = [0,0] trials = [0,0] N = 100000 trials = zeros(shape=(N,2)) successes = zeros(shape=(N,2)) bb = BetaBandit() for i in range(N): choice = bb.get_recommendation() trials[choice] = trials[choice]+1 conv = is_conversion(choice) bb.add_result(choice, conv) trials[i] = bb.trials successes[i] = bb.successes from pylab import * subplot(211) n = arange(N)+1 loglog(n, trials[:,0], label="title 0") loglog(n, trials[:,1], label="title 1") legend() xlabel("Number of trials") ylabel("Number of trials/title") subplot(212) semilogx(n, (successes[:,0]+successes[:,1])/n, label="CTR") semilogx(n, zeros(shape=(N,))+0.35, label="Best CTR") semilogx(n, zeros(shape=(N,))+0.30, label="Random chance CTR") semilogx(n, zeros(shape=(N,))+0.25, label="Worst CTR") axis([0,N,0.15,0.45]) xlabel("Number of trials") ylabel("CTR") legend() show()

### jvcodell commented Mar 26, 2018 • edited

 In the iteration over time steps: for i in range(N): choice = bb.get_recommendation() trials[choice] = trials[choice]+1 conv = is_conversion(choice) bb.add_result(choice, conv) trials[i] = bb.trials successes[i] = bb.successes Can you explain what the code, " trials[choice] = trials[choice]+1" does? the first dimension of trials is over the time horizon, isn't it? I'm confused why you do that, then set "trials[i] = bb.trials" later on. It seems like the " trials[i] = bb.trials" is all that's needed since you really just want to record what action has been tried in this time step... is that right? Thanks in advance.