Last active
May 15, 2021 11:39
-
-
Save nagataka/c02b9acd6e8a8d7696e09f8a129d3362 to your computer and use it in GitHub Desktop.
Epsilon-Greedy written in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
class EpsilonGreedy(): | |
def __init__(self, epsilon, counts, values): | |
self.epsilon = epsilon # probability of explore | |
self.counts = counts # number of pulls for each arms | |
self.values = values # average amount of reward we've gotten from each arms | |
return | |
def initialize(self, n_arms): | |
self.counts = [0 for col in range(n_arms)] | |
self.values = [0.0 for col in range(n_arms)] | |
return | |
def ind_max(self, x): | |
m = max(x) | |
return x.index(m) | |
def select_arm(self): | |
if random.random() > self.epsilon: | |
return self.ind_max(self.values) | |
else: | |
return random.randrange(len(self.values)) | |
def update(self, chosen_arm, reward): | |
self.counts[chosen_arm] = self.counts[chosen_arm] + 1 | |
n = self.counts[chosen_arm] | |
value = self.values[chosen_arm] | |
new_value = ((n-1) / float(n)) * value + (1 / float(n)) * reward # weighted average of the previously estimated value and the reward we just received | |
self.values[chosen_arm] = new_value | |
return | |
class BernoulliArm(): | |
def __init__(self, p): | |
self.p = p | |
def draw(self): | |
if random.random() > self.p: | |
return 0.0 | |
else: | |
return 1.0 | |
def test_algorithm(algo, arms, num_sims, horizon): | |
chosen_arms = [0.0 for i in range(num_sims * horizon)] | |
rewards = [0.0 for i in range(num_sims * horizon)] | |
cumulative_rewards = [0.0 for i in range(num_sims * horizon)] | |
sim_nums = [0.0 for i in range(num_sims * horizon)] | |
times = [0.0 for i in range(num_sims * horizon)] | |
for sim in range(num_sims): | |
sim = sim + 1 | |
algo.initialize(len(arms)) | |
for t in range(horizon): | |
t = t + 1 | |
index = (sim -1) * horizon + t - 1 #??? | |
sim_nums[index] = sim | |
times[index] = t | |
chosen_arm = algo.select_arm() | |
chosen_arms[index] = chosen_arm | |
reward = arms[chosen_arms[index]].draw() | |
rewards[index] = reward | |
if t == 1: | |
cumulative_rewards[index] = reward | |
else: | |
cumulative_rewards[index] = cumulative_rewards[index - 1] + reward | |
algo.update(chosen_arm, reward) | |
return [sim_nums, times, chosen_arms, rewards, cumulative_rewards] | |
random.seed(1) | |
means = [0.1, 0.1, 0.1, 0.1, 0.9] | |
n_arms = len(means) | |
random.shuffle(means) | |
arms = list(map(lambda mu: BernoulliArm(mu), means)) | |
# print("Best arm is "+ str(ind_max(means))) | |
f = open("~/algorithms/epsilon_greedy/standard_results.tsv", "w") | |
for epsilon in [0.1, 0.2, 0.3, 0.4, 0.5]: | |
algo = EpsilonGreedy(epsilon, [], []) | |
algo.initialize(n_arms) | |
results = test_algorithm(algo, arms, 5000, 250) | |
for i in range(len(results[0])): | |
f.write(str(epsilon) + "\t") | |
f.write("\t".join([str(results[j][i]) for j in range(len(results))]) + "\n") | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment