Created
February 11, 2017 01:27
-
-
Save caffeine-potent/9e79fe52d28abe8f6c9e38783e1271d4 to your computer and use it in GitHub Desktop.
Softmax!!!!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import random | |
def categorical_draw(probs): | |
''' | |
if | |
P(A) = .5 | |
P(B) = .2 | |
P(C) = .3 | |
how do I uniformly sample this probabilit distribution? | |
''' | |
z = random.random() | |
cum_prob = 0.0 | |
for i in range(len(probs)): | |
prob = probs[i] | |
cum_prob += prob | |
if cum_prob > z: | |
return i | |
class Softmax_Bandit_Algorithm: | |
def __init__(self, temperature, counts, values): | |
self.temperature = temperature | |
self.counts = counts | |
self.values = values | |
return | |
def initialize(self, n_arms): | |
self.counts = [0 for col in range(n_arms)] | |
self.values = [0.0 for col in range(n_armsb)] | |
return | |
def select_arm(self): | |
z = sum([math.exp(v/ self.temperature) for v in self.values]) | |
probs = [math.exp(v/self.temperature)/z for v in self.values] | |
return categorical_draw(probs) | |
def update(self,chosen_arm,reward): | |
self.counts[chosen_arm] = self.counts[chosen_arm] + 1 | |
n = self.counts[chosen_arm] | |
value = self.values[chosen_arm] | |
new_value = ((n-1) / float(n)) * value + (1/float(n)) * reward | |
self.values[chosen_arm] = new_value | |
return | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment