Created
June 2, 2019 00:31
-
-
Save Roger-random/f6c77b2b9d37a39999fd434de50d624c to your computer and use it in GitHub Desktop.
Following simple Q-learning tutorial
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Following along sample from this page | |
# https://www.oreilly.com/learning/introduction-to-reinforcement-learning-and-openai-gym | |
# with a few modifications to look at what's going on under the hood. | |
import gym | |
import numpy as np | |
import matplotlib.pyplot as plt | |
env = gym.make("Taxi-v2") | |
env.reset() | |
history = list() | |
counter = 0 | |
done = False | |
Q = np.zeros([env.observation_space.n, env.action_space.n]) | |
G = 0 | |
alpha = 0.618 # Don't understand where this value came from yet | |
for episode in range (1,1001): | |
done = False | |
G, reward = 0,0 | |
state = env.reset() | |
while done != True: | |
action = np.argmax(Q[state]) | |
state2, reward, done, info = env.step(action) | |
Q[state,action] += alpha * (reward + np.max(Q[state2]) - Q[state,action]) | |
G += reward | |
state = state2 | |
if episode % 50 == 0: | |
print('Episode {} Total Reward: {}'.format(episode,G)) | |
history.append(G) | |
# print(np.count_nonzero(Q)) | |
plt.plot(history) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment