caffeine-potent/EpsilonGreedy.py

## EpsilonGreedy.py
import numpy as np
class EpsGreedy()
  def __init__(self, number_of_bandits, epsilon, start_greedy= True):
    self.count = np.zeros(number_of_bandits)
    self.scores = np.array([int(!start_greedy)] * number of bandits)
    self.epsilon = epsilon
    self.bandit_count = number_of_bandits
  def select_arm():
    choice = np.random.binomial(1,self.epsilon):
    if choice = 1: #EXPLORE
      return np.random.randint(self.number_of_bandits)
    else:
      return self.scores.argmax()
  def update(self, bandit_index, reward):
      self.count[bandit_index] = self.count[bandit_index] + 1
      n = self.count[bandit_index]
      score = self.scores[bandit_index]
      new_value = (n-1/float(n)) * score + (1/float(n)) * reward
      self.score[bandit_index] = new_value

class Binomial_Bandit:
  def __init__(self, p_of_payoff):
    self.probability = p_of_payoff
  def get_reward():
    return np.binomial(1,self.probability)
	import numpy as np
	class EpsGreedy()
	def __init__(self, number_of_bandits, epsilon, start_greedy= True):
	self.count = np.zeros(number_of_bandits)
	self.scores = np.array([int(!start_greedy)] * number of bandits)
	self.epsilon = epsilon
	self.bandit_count = number_of_bandits
	def select_arm():
	choice = np.random.binomial(1,self.epsilon):
	if choice = 1: #EXPLORE
	return np.random.randint(self.number_of_bandits)
	else:
	return self.scores.argmax()
	def update(self, bandit_index, reward):
	self.count[bandit_index] = self.count[bandit_index] + 1
	n = self.count[bandit_index]
	score = self.scores[bandit_index]
	new_value = (n-1/float(n)) * score + (1/float(n)) * reward
	self.score[bandit_index] = new_value

	class Binomial_Bandit:
	def __init__(self, p_of_payoff):
	self.probability = p_of_payoff
	def get_reward():
	return np.binomial(1,self.probability)