Created
February 11, 2017 01:38
-
-
Save caffeine-potent/b88589af2574478a1b511fe45d5b2e4a to your computer and use it in GitHub Desktop.
Softmax Annealing!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import random | |
def categorical_draw(probs): | |
''' | |
if | |
P(A) = .5 | |
P(B) = .2 | |
P(C) = .3 | |
how do I uniformly sample this probabilit distribution? | |
''' | |
z = random.random() | |
cum_prob = 0.0 | |
for i in range(len(probs)): | |
prob = probs[i] | |
cum_prob += prob | |
if cum_prob > z: | |
return i | |
class Softmax_Annealing_Bandit_Algorithm: | |
def __init__(self, temperature, counts, values): | |
self.counts = counts | |
self.values = values | |
return | |
def initialize(self, n_arms): | |
self.counts = [0 for col in range(n_arms)] | |
self.values = [0.0 for col in range(n_armsb)] | |
return | |
def select_arm(self): | |
t = sum(self.values) + 1 | |
temperature = 1/math.log(t + 0.0000001) | |
z = sum([math.exp(v/ temperature) for v in self.values]) | |
probs = [math.exp(v/temperature)/z for v in self.values] | |
return categorical_draw(probs) | |
def update(self,chosen_arm,reward): | |
self.counts[chosen_arm] = self.counts[chosen_arm] + 1 | |
n = self.counts[chosen_arm] | |
value = self.values[chosen_arm] | |
new_value = ((n-1) / float(n)) * value + (1/float(n)) * reward | |
self.values[chosen_arm] = new_value | |
return |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment