Skip to content

Instantly share code, notes, and snippets.

@stober
Created May 14, 2011 00:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stober/971547 to your computer and use it in GitHub Desktop.
Save stober/971547 to your computer and use it in GitHub Desktop.
Epsilon Greedy N-Armed Bandit Solver
# J. Stober
# May 13, 2011
import numpy as np
import numpy.random as nr
class EGreedy(object):
def __init__(self, k = 10, epsilon = 0.1):
self.nactions = k
self.epsilon = epsilon
self.averages = [0.0] * k
self.counts = [0] * k
def action(self):
i = np.argmax(self.averages)
if nr.rand() < self.epsilon:
return nr.randint(0,self.nactions)
else:
return i
def update(self, a, r):
c = float(self.counts[a])
p = float(self.averages[a])
self.averages[a] = (r + c * p) / (c + 1) # cumulative average
self.counts[a] += 1
def train(self, env, nsteps = 1000):
for i in range(nsteps):
a = self.action()
r = env.run(a) # the environment
self.update(a,r)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment