Skip to content

Instantly share code, notes, and snippets.

@athergeek
Forked from suhailgupta03/qlearning.py
Created November 23, 2018 00:52
Show Gist options
  • Save athergeek/9875f33774ba491aba2b721cd809667e to your computer and use it in GitHub Desktop.
Save athergeek/9875f33774ba491aba2b721cd809667e to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import random as rand
world_df = pd.read_csv('world01.csv',
names=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
# Change world as multi dimensional array
world = world_df.values
print(world)
number_of_states = len(world[0]) * len(world[:, 0])
number_of_actions = 4
# Create a Qtable
qtable = np.random.rand(number_of_states, number_of_actions)
print("Qtable at the beginning")
print(qtable)
start_pos_ix = 4
start_pos_iy = 9
# 0 : blank space
# 1 : obstacle
# 2 : starting location for the robot
# 3 : goal location
# 5 : quicksand
# Objective : learn how to navigate from the starting location to the goal with the highest total reward
# Reward:
# -1 : if the robot moves to an empty or blank space, or attempts to move into a wall
# -100 : if the robot moves to a quicksand space
# 1 : if the robot moves to the goal space
# Learn the Qtable
max_iterations = 50000
learning_rate = 0.2
gamma = 0.9
random_action_decay_rate = 0.99
def prepare_qtable():
random_action_rate = 0.97
start_state_tuple = (start_pos_iy, start_pos_ix)
start_state = int(str(start_pos_iy) + str(start_pos_ix))
s = start_state
pos_y = start_pos_iy
pos_x = start_pos_ix
goal_reached = 0
absorbed = 0
for _ in range(max_iterations):
action = act(random_action_rate, s)
prev_state = (pos_y, pos_x)
new_state = prev_state
if action == 0:
# move right
new_state = (pos_y, pos_x + 1)
elif action == 1:
# move up
new_state = (pos_y - 1, pos_x)
elif action == 2:
# move left
new_state = (pos_y, pos_x - 1)
elif action == 3:
# move down
new_state = (pos_y + 1, pos_x)
if new_state[1] < 0:
s_prime = -1 * int(str(new_state[0]) + str(-1*new_state[1]))
else:
s_prime = int(str(new_state[0]) + str(new_state[1]))
if (s_prime) < 0 or (new_state[0] > 9) or (new_state[1] > 9) or (world[new_state[0]][new_state[1]] == 1):
# if the agent tries to go beyond the environment or moves into the wall
# give it a score of -1 and make new state of agent equal to the
# old state
new_state = prev_state
s_prime = s
reward = -1
elif world[new_state[0]][new_state[1]] == 5:
# if the agent moves into the quicksand, end the episode
reward = -100
new_state = start_state_tuple
absorbed += 1
elif world[new_state[0]][new_state[1]] == 1:
# if the agent reaches the goal, end the episode with a
# positive reward
reward = 1
new_state = start_state_tuple
goal_reached += 1
else:
reward = -1
update_qtable(s, action, s_prime, reward)
s = s_prime
a = action
pos_y = new_state[0]
pos_x = new_state[1]
random_action_rate = random_action_rate * random_action_decay_rate
print("Number of times agent reached the goal during the training: {}".format(goal_reached))
print("Number of times agent was absorbed during the training: {}".format(absorbed))
def update_qtable(s, a, s_prime, r):
#print(s, ' ', a, ' ', s_prime, ' ', r)
#print((1.0 - learning_rate) * qtable[s, a] + learning_rate * (r + gamma * qtable[s_prime, np.argmax(qtable[s_prime])]))
qtable[s, a] = (1.0 - learning_rate) * qtable[s, a] + learning_rate * \
(r + gamma * qtable[s_prime, np.argmax(qtable[s_prime])])
def act(rar, state):
flip = rand.random()
if flip < rar:
# Take a random step
action = rand.randint(0, number_of_actions - 1)
else:
# Take a calculated step
action = np.argmax(qtable[state])
return action
if __name__ == "__main__":
prepare_qtable()
print("Qtable after {} iterations".format(max_iterations))
print(qtable)
print("Saving qtabe as qtable.csv")
pd.DataFrame(data = qtable).to_csv('qtable.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment