Skip to content

Instantly share code, notes, and snippets.

@qfettes
Created July 7, 2017 03:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save qfettes/cedda58cfac144633ed739b66e90b275 to your computer and use it in GitHub Desktop.
Save qfettes/cedda58cfac144633ed739b66e90b275 to your computer and use it in GitHub Desktop.
import numpy as np
import scipy
import gym
from collections import defaultdict
from gym import wrappers
GAMMA = 0.9
ALPHA = 0.1
NUM_EPISODES = 500000
EPSILON = 1.0
EPSILON_DECAY = 0.9999
def random_action(a, env, eps=0.1):
p = np.random.random()
if p < (1 - eps):
return a
else:
return env.action_space.sample()
def play_game(env, epsilon):
pass
if __name__ == '__main__':
env = gym.make('FrozenLake-v0')
env = wrappers.Monitor(env, 'frozenlake-experiment-Q1', force=True)
Q = np.zeros((env.observation_space.n, env.action_space.n))
updateCounts = np.zeros((env.observation_space.n, env.action_space.n))
deltas = []
averageReturn = 0
lastHalfAverage = 0
for episode in xrange(NUM_EPISODES):
s = env.reset()
biggest_change = 0
while True:
a = np.argmax(Q[s, :])
a = random_action(a, env, EPSILON)
old_qsa = Q[s, a]
s2, r, done, info = env.step(a)
if (s2==s):
r-=0.01
elif(done and r == 0):
r = -1.0
#alpha = (ALPHA/(updateCounts[s, a]+1))
alpha = ALPHA
updateCounts[s, a] += 1
a2 = np.argmax(Q[s2, :])
maxNext = Q[s2, a2]
Q[s, a] = old_qsa + alpha*(r + GAMMA*maxNext - old_qsa)
biggest_change = max(biggest_change, np.abs(old_qsa - Q[s, a]))
updateCounts[s, a] = updateCounts[s, a] + 1
s=s2
a=a2
if(done):
averageReturn = averageReturn + (r-averageReturn)/(episode+1)
EPSILON *= EPSILON_DECAY
if (episode+1)%10000 == 0:
print "Episode: ", episode+1
print 'Epsilon: ', EPSILON
print 'Average Return: ', averageReturn
break
deltas.append(biggest_change)
env.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment