Skip to content

Instantly share code, notes, and snippets.

@caffeine-potent
Created February 10, 2017 20:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save caffeine-potent/5111c57d475d8091f1433790fcb5fb3e to your computer and use it in GitHub Desktop.
Save caffeine-potent/5111c57d475d8091f1433790fcb5fb3e to your computer and use it in GitHub Desktop.
Epsilon Greedy from "Bandit Algorithms"
import numpy as np
class EpsGreedy()
def __init__(self, number_of_bandits, epsilon, start_greedy= True):
self.count = np.zeros(number_of_bandits)
self.scores = np.array([int(!start_greedy)] * number of bandits)
self.epsilon = epsilon
self.bandit_count = number_of_bandits
def select_arm():
choice = np.random.binomial(1,self.epsilon):
if choice = 1: #EXPLORE
return np.random.randint(self.number_of_bandits)
else:
return self.scores.argmax()
def update(self, bandit_index, reward):
self.count[bandit_index] = self.count[bandit_index] + 1
n = self.count[bandit_index]
score = self.scores[bandit_index]
new_value = (n-1/float(n)) * score + (1/float(n)) * reward
self.score[bandit_index] = new_value
class Binomial_Bandit:
def __init__(self, p_of_payoff):
self.probability = p_of_payoff
def get_reward():
return np.binomial(1,self.probability)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment