Last active
April 25, 2018 16:09
-
-
Save lincerely/059418660fa467229965d7964fa37c49 to your computer and use it in GitHub Desktop.
Implemented the example from A painless Q-Learning Tutorial http://mnemstudio.org/path-finding-q-learning-tutorial.htm and visualized
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#http://mnemstudio.org/path-finding-q-learning-tutorial.htm | |
#tested on python and python3 | |
import numpy as np | |
import os | |
R = np.array([[-1, -1, -1, -1, 0, -1], | |
[-1, -1, -1, 0, -1, 100], | |
[-1, -1, -1, 0, -1, -1], | |
[-1, 0, 0, -1, 0, -1], | |
[0, -1, -1, 0, -1, 100], | |
[-1, 0, -1, -1, 0, 100]]).astype("float16") | |
Q = np.zeros_like(R) | |
gamma = 0.8 | |
max_iter = 1000 | |
num_states = 6 | |
actions = np.arange(6) | |
#start training | |
for n in range(int(max_iter)): | |
isGoal = False | |
#select a random inital state | |
state = np.random.randint(num_states); | |
while not isGoal: | |
valid_moves = R[state] >= 0 | |
valid_actions = actions[valid_moves == True] | |
action = int(np.random.choice(valid_actions,size=1)) | |
next_state = action | |
Q[state,action] = R[state,action] + gamma * max(Q[next_state,:]) | |
#comment out the following 4 lines to skip to the result directly | |
print(np.rint(Q)) | |
print('current state: ',state) | |
print('iterations:', n) | |
os.system('cls' if os.name == 'nt' else 'clear') | |
if next_state == 5: | |
isGoal = True | |
state = next_state | |
#end of training | |
#display final Q | |
print('\nTrained Q: ') | |
Q =np.rint(Q) | |
print(Q) | |
nQ = Q/np.max(Q)*100 | |
print('\nnormalized Q: ') | |
print(np.rint(nQ)) | |
#testing | |
state = 2 | |
steps = []; | |
steps.append(state) | |
while state != 5: | |
actions = np.where(Q[state]== np.max(Q[state]))[0] | |
if actions.shape[0] >1: | |
action = int(np.random.choice(actions,size=1)) | |
else: | |
action = int(actions) | |
steps.append(action) | |
state = action | |
print('\nFrom 2 to 5') | |
print(steps) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment