Created
November 24, 2023 19:07
-
-
Save B-R-P/89db51ca89a5170a88b107bce15c76f9 to your computer and use it in GitHub Desktop.
Q-Star Reinforcement Learning for Grid Navigation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# Define constants for the grid world | |
GRID_SIZE = 5 | |
NUM_ACTIONS = 4 | |
NUM_EPISODES = 500 | |
MAX_STEPS_PER_EPISODE = 50 | |
# Define states (representing positions on the grid) | |
STATES = [(i, j) for i in range(GRID_SIZE) for j in range(GRID_SIZE)] | |
# Define actions (representing possible movements: up, down, left, right) | |
ACTIONS = [0, 1, 2, 3] # 0: up, 1: down, 2: left, 3: right | |
# Initialize Q-values for each state-action pair | |
initial_q_values = np.zeros((GRID_SIZE, GRID_SIZE, NUM_ACTIONS)) | |
# Function to get the reward for a given state-action pair | |
def get_reward(state, action): | |
next_state = get_next_state(state, action) | |
# Define a positive reward if the agent reaches the goal, negative for obstacles, and zero otherwise | |
if next_state == (GRID_SIZE - 1, GRID_SIZE - 1): # Goal state | |
return 10 | |
elif next_state in [(1, 1), (2, 2), (3, 3)]: # Obstacle states | |
return -5 | |
else: | |
return 0 | |
# Function to get the next state for a given state-action pair | |
def get_next_state(state, action): | |
i, j = state | |
if action == 0: # up | |
return max(i - 1, 0), j | |
elif action == 1: # down | |
return min(i + 1, GRID_SIZE - 1), j | |
elif action == 2: # left | |
return i, max(j - 1, 0) | |
elif action == 3: # right | |
return i, min(j + 1, GRID_SIZE - 1) | |
# Function to check if a state is a terminal state | |
def is_terminal_state(state): | |
return state == (GRID_SIZE - 1, GRID_SIZE - 1) # Goal state | |
# Function to check if the maximum number of steps is reached | |
def max_steps_reached(): | |
global step_count | |
step_count += 1 | |
return step_count >= MAX_STEPS_PER_EPISODE | |
# Q-star Reinforcement Learning | |
def q_star_reinforcement_learning(S, A, initial_q_values, alpha, gamma, epsilon, max_episodes): | |
Q = np.copy(initial_q_values) | |
for episode in range(1, max_episodes + 1): | |
global step_count | |
step_count = 0 # Reset step count for each episode | |
current_state = (0, 0) # Start from the top-left corner | |
while True: | |
if np.random.rand() < epsilon: | |
current_action = np.random.choice(A) | |
else: | |
current_action = np.argmax(Q[current_state[0], current_state[1], :] + np.random.randn(1, NUM_ACTIONS) * (1.0 / (episode + 1))) | |
reward = get_reward(current_state, current_action) | |
next_state = get_next_state(current_state, current_action) | |
Q[current_state[0], current_state[1], current_action] += alpha * ( | |
reward + gamma * np.max(Q[next_state[0], next_state[1], :]) - Q[current_state[0], current_state[1], current_action] | |
) | |
current_state = next_state | |
if is_terminal_state(current_state) or max_steps_reached(): | |
break | |
return Q # Return the trained Q-values | |
# Main program | |
step_count = 0 # Reset step count for each episode | |
trained_q_values = q_star_reinforcement_learning(STATES, ACTIONS, initial_q_values, alpha=0.1, gamma=0.9, epsilon=0.2, max_episodes=NUM_EPISODES) | |
# Use the trained Q-values to navigate through the grid world | |
current_state = (0, 0) | |
while not is_terminal_state(current_state): | |
action = np.argmax(trained_q_values[current_state[0], current_state[1], :]) | |
print(f"Move {action}: {current_state} -> {get_next_state(current_state, action)}") | |
current_state = get_next_state(current_state, action) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Speculation