Skip to content

Instantly share code, notes, and snippets.

@lincerely
Last active April 25, 2018 16:09
Show Gist options
  • Save lincerely/059418660fa467229965d7964fa37c49 to your computer and use it in GitHub Desktop.
Save lincerely/059418660fa467229965d7964fa37c49 to your computer and use it in GitHub Desktop.
Implemented the example from A painless Q-Learning Tutorial http://mnemstudio.org/path-finding-q-learning-tutorial.htm and visualized
#!/usr/bin/env python
#http://mnemstudio.org/path-finding-q-learning-tutorial.htm
#tested on python and python3
import numpy as np
import os
R = np.array([[-1, -1, -1, -1, 0, -1],
[-1, -1, -1, 0, -1, 100],
[-1, -1, -1, 0, -1, -1],
[-1, 0, 0, -1, 0, -1],
[0, -1, -1, 0, -1, 100],
[-1, 0, -1, -1, 0, 100]]).astype("float16")
Q = np.zeros_like(R)
gamma = 0.8
max_iter = 1000
num_states = 6
actions = np.arange(6)
#start training
for n in range(int(max_iter)):
isGoal = False
#select a random inital state
state = np.random.randint(num_states);
while not isGoal:
valid_moves = R[state] >= 0
valid_actions = actions[valid_moves == True]
action = int(np.random.choice(valid_actions,size=1))
next_state = action
Q[state,action] = R[state,action] + gamma * max(Q[next_state,:])
#comment out the following 4 lines to skip to the result directly
print(np.rint(Q))
print('current state: ',state)
print('iterations:', n)
os.system('cls' if os.name == 'nt' else 'clear')
if next_state == 5:
isGoal = True
state = next_state
#end of training
#display final Q
print('\nTrained Q: ')
Q =np.rint(Q)
print(Q)
nQ = Q/np.max(Q)*100
print('\nnormalized Q: ')
print(np.rint(nQ))
#testing
state = 2
steps = [];
steps.append(state)
while state != 5:
actions = np.where(Q[state]== np.max(Q[state]))[0]
if actions.shape[0] >1:
action = int(np.random.choice(actions,size=1))
else:
action = int(actions)
steps.append(action)
state = action
print('\nFrom 2 to 5')
print(steps)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment