Created
January 29, 2021 09:52
-
-
Save dharma6872/c1791d1799578a3a1f7cc15a10fed758 to your computer and use it in GitHub Desktop.
[Lecture 5 Q learning Nondeterministic] #강화학습
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
"""Lecture 5 Q learning Nondeterministic""" | |
import numpy as np | |
import gym | |
import random | |
import matplotlib.pyplot as plt | |
env = gym.make("FrozenLake-v0") | |
Q = np.zeros([env.observation_space.n, env.action_space.n]) | |
learning_rate = .85 | |
dis = .99 | |
num_episodes = 2000 | |
"""### 미끄러지는 환경에서 학습""" | |
rList = [] | |
for i in range(num_episodes): | |
state = env.reset() | |
rAll = 0 | |
done = False | |
while not done: | |
action = np.argmax(Q[state, :] + np.random.rand(1, env.action_space.n) / (i + 1)) | |
new_state, reward, done, _ = env.step(action) | |
Q[state, action] = reward + dis * np.max(Q[new_state, :]) | |
state = new_state | |
rAll += reward | |
rList.append(rAll) | |
print("Score over time: " + str(sum(rList) / num_episodes)) | |
print("Final Q-table Value") | |
print(Q) | |
plt.bar(range(len(rList)), rList, color="blue") | |
plt.show() | |
"""### 미끄러지는 경우 반영""" | |
rList = [] | |
for i in range(num_episodes): | |
state = env.reset() | |
rAll = 0 | |
done = False | |
while not done: | |
action = np.argmax(Q[state, :] + np.random.rand(1, env.action_space.n) / (i + 1)) | |
new_state, reward, done, _ = env.step(action) | |
#Q[state, action] = reward + dis * np.max(Q[new_state, :]) | |
Q[state, action] = (1- learning_rate) * Q[state, action] + \ | |
learning_rate * (reward + dis * np.max(Q[new_state, :])) | |
state = new_state | |
rAll += reward | |
rList.append(rAll) | |
print("Score over time: " + str(sum(rList) / num_episodes)) | |
print("Final Q-table Value") | |
print(Q) | |
plt.bar(range(len(rList)), rList, color="blue") | |
plt.show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment