athergeek/qlearning.py

## qlearning.py
import numpy as np
import pandas as pd
import random as rand

world_df = pd.read_csv('world01.csv',
                       names=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
# Change world as multi dimensional array
world = world_df.values
print(world)

number_of_states = len(world[0]) * len(world[:, 0])
number_of_actions = 4

# Create a Qtable
qtable = np.random.rand(number_of_states, number_of_actions)
print("Qtable at the beginning")
print(qtable)

start_pos_ix = 4
start_pos_iy = 9

# 0 : blank space
# 1 : obstacle
# 2 : starting location for the robot
# 3 : goal location
# 5 : quicksand

# Objective : learn how to navigate from the starting location to the goal with the highest total reward
# Reward:
# -1 :  if the robot moves to an empty or blank space, or attempts to move into a wall
# -100 : if the robot moves to a quicksand space
# 1 : if the robot moves to the goal space

# Learn the Qtable
max_iterations = 50000
learning_rate = 0.2
gamma = 0.9
random_action_decay_rate = 0.99

def prepare_qtable():
	random_action_rate = 0.97
	start_state_tuple = (start_pos_iy, start_pos_ix)
	start_state = int(str(start_pos_iy) + str(start_pos_ix))
	s = start_state
	pos_y = start_pos_iy
	pos_x = start_pos_ix
	goal_reached = 0
	absorbed = 0

	for _ in range(max_iterations):
		action = act(random_action_rate, s)
		prev_state = (pos_y, pos_x)
		new_state = prev_state
		if action == 0:
			# move right
			new_state = (pos_y, pos_x + 1)
		elif action == 1:
			# move up
			new_state = (pos_y - 1, pos_x)
		elif action == 2:
			# move left
			new_state = (pos_y, pos_x - 1)
		elif action == 3:
			# move down
			new_state = (pos_y + 1, pos_x)

		if new_state[1] < 0:
			s_prime = -1 * int(str(new_state[0]) + str(-1*new_state[1]))
		else:
			s_prime = int(str(new_state[0]) + str(new_state[1]))

		if (s_prime) < 0 or (new_state[0] > 9) or (new_state[1] > 9) or (world[new_state[0]][new_state[1]] == 1):
			# if the agent tries to go beyond the environment or moves into the wall
			# give it a score of -1 and make new state of agent equal to the
			# old state
			new_state = prev_state
			s_prime = s
			reward = -1
		elif world[new_state[0]][new_state[1]] == 5:
			# if the agent moves into the quicksand, end the episode
			reward = -100
			new_state = start_state_tuple
			absorbed += 1
		elif world[new_state[0]][new_state[1]] == 1:
			# if the agent reaches the goal, end the episode with a
			# positive reward
			reward = 1
			new_state = start_state_tuple
			goal_reached += 1
		else:
			reward = -1

		update_qtable(s, action, s_prime, reward)

		s = s_prime
		a = action
		pos_y = new_state[0]
		pos_x = new_state[1]
		random_action_rate = random_action_rate * random_action_decay_rate

	print("Number of times agent reached the goal during the training: {}".format(goal_reached))
	print("Number of times agent was absorbed during the training: {}".format(absorbed))

def update_qtable(s, a, s_prime, r):

    #print(s, ' ', a, ' ', s_prime, ' ', r)
    #print((1.0 - learning_rate) * qtable[s, a] + learning_rate * (r + gamma * qtable[s_prime, np.argmax(qtable[s_prime])]))
    qtable[s, a] = (1.0 - learning_rate) * qtable[s, a] + learning_rate * \
        (r + gamma * qtable[s_prime, np.argmax(qtable[s_prime])])


def act(rar, state):
    flip = rand.random()
    if flip < rar:
        # Take a random step
        action = rand.randint(0, number_of_actions - 1)
    else:
        # Take a calculated step
        action = np.argmax(qtable[state])

    return action


if __name__ == "__main__":
	prepare_qtable()
	print("Qtable after {} iterations".format(max_iterations))
	print(qtable)
	print("Saving qtabe as qtable.csv")
	pd.DataFrame(data = qtable).to_csv('qtable.csv')
	import numpy as np
	import pandas as pd
	import random as rand

	world_df = pd.read_csv('world01.csv',
	names=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
	# Change world as multi dimensional array
	world = world_df.values
	print(world)

	number_of_states = len(world[0]) * len(world[:, 0])
	number_of_actions = 4

	# Create a Qtable
	qtable = np.random.rand(number_of_states, number_of_actions)
	print("Qtable at the beginning")
	print(qtable)

	start_pos_ix = 4
	start_pos_iy = 9

	# 0 : blank space
	# 1 : obstacle
	# 2 : starting location for the robot
	# 3 : goal location
	# 5 : quicksand

	# Objective : learn how to navigate from the starting location to the goal with the highest total reward
	# Reward:
	# -1 : if the robot moves to an empty or blank space, or attempts to move into a wall
	# -100 : if the robot moves to a quicksand space
	# 1 : if the robot moves to the goal space

	# Learn the Qtable
	max_iterations = 50000
	learning_rate = 0.2
	gamma = 0.9
	random_action_decay_rate = 0.99

	def prepare_qtable():
	random_action_rate = 0.97
	start_state_tuple = (start_pos_iy, start_pos_ix)
	start_state = int(str(start_pos_iy) + str(start_pos_ix))
	s = start_state
	pos_y = start_pos_iy
	pos_x = start_pos_ix
	goal_reached = 0
	absorbed = 0

	for _ in range(max_iterations):
	action = act(random_action_rate, s)
	prev_state = (pos_y, pos_x)
	new_state = prev_state
	if action == 0:
	# move right
	new_state = (pos_y, pos_x + 1)
	elif action == 1:
	# move up
	new_state = (pos_y - 1, pos_x)
	elif action == 2:
	# move left
	new_state = (pos_y, pos_x - 1)
	elif action == 3:
	# move down
	new_state = (pos_y + 1, pos_x)

	if new_state[1] < 0:
	s_prime = -1 * int(str(new_state[0]) + str(-1*new_state[1]))
	else:
	s_prime = int(str(new_state[0]) + str(new_state[1]))

	if (s_prime) < 0 or (new_state[0] > 9) or (new_state[1] > 9) or (world[new_state[0]][new_state[1]] == 1):
	# if the agent tries to go beyond the environment or moves into the wall
	# give it a score of -1 and make new state of agent equal to the
	# old state
	new_state = prev_state
	s_prime = s
	reward = -1
	elif world[new_state[0]][new_state[1]] == 5:
	# if the agent moves into the quicksand, end the episode
	reward = -100
	new_state = start_state_tuple
	absorbed += 1
	elif world[new_state[0]][new_state[1]] == 1:
	# if the agent reaches the goal, end the episode with a
	# positive reward
	reward = 1
	new_state = start_state_tuple
	goal_reached += 1
	else:
	reward = -1

	update_qtable(s, action, s_prime, reward)

	s = s_prime
	a = action
	pos_y = new_state[0]
	pos_x = new_state[1]
	random_action_rate = random_action_rate * random_action_decay_rate

	print("Number of times agent reached the goal during the training: {}".format(goal_reached))
	print("Number of times agent was absorbed during the training: {}".format(absorbed))

	def update_qtable(s, a, s_prime, r):

	#print(s, ' ', a, ' ', s_prime, ' ', r)
	#print((1.0 - learning_rate) * qtable[s, a] + learning_rate * (r + gamma * qtable[s_prime, np.argmax(qtable[s_prime])]))
	qtable[s, a] = (1.0 - learning_rate) * qtable[s, a] + learning_rate * \
	(r + gamma * qtable[s_prime, np.argmax(qtable[s_prime])])


	def act(rar, state):
	flip = rand.random()
	if flip < rar:
	# Take a random step
	action = rand.randint(0, number_of_actions - 1)
	else:
	# Take a calculated step
	action = np.argmax(qtable[state])

	return action



	if __name__ == "__main__":
	prepare_qtable()
	print("Qtable after {} iterations".format(max_iterations))
	print(qtable)
	print("Saving qtabe as qtable.csv")
	pd.DataFrame(data = qtable).to_csv('qtable.csv')