rish-16/main.py

## main.py
# The main RL event loop
for i in range(num_episodes):
    S0 = env.reset() # Current state
    total_reward_for_episode = 0
    game_done = False

    for s in range(steps_per_episode):
        # Choosing an action to perform in the current state
        if np.random.rand() > epsilon:
            action = env.action_space.sample() # Explore with a random action
        else:
            # Choose the action with the highest Q value from the table for the given state
            action = np.argmax(q_table[S0, :])

        # Perform action and get the reward and next state
        S1, R, game_done, info = env.step(action)

        # Update the table using the Bellman Equation
        q_table[S0, action] = q_table[S0, action] + alpha(R + gamma*np.max(q_table[S1, :] - q_table[S0, action]))

        # Add the reward from this state to the total episode reward
        total_reward_for_episode += R

        # Move on to the next state and repeat
        S0 = S1

        if game_done:
            # Stop playing the game if the Agent completes the task or fails
            break

    # For further analysis
    rewards_per_episode.append(total_reward_for_episode)
	# The main RL event loop
	for i in range(num_episodes):
	S0 = env.reset() # Current state
	total_reward_for_episode = 0
	game_done = False

	for s in range(steps_per_episode):
	# Choosing an action to perform in the current state
	if np.random.rand() > epsilon:
	action = env.action_space.sample() # Explore with a random action
	else:
	# Choose the action with the highest Q value from the table for the given state
	action = np.argmax(q_table[S0, :])

	# Perform action and get the reward and next state
	S1, R, game_done, info = env.step(action)

	# Update the table using the Bellman Equation
	q_table[S0, action] = q_table[S0, action] + alpha(R + gamma*np.max(q_table[S1, :] - q_table[S0, action]))

	# Add the reward from this state to the total episode reward
	total_reward_for_episode += R

	# Move on to the next state and repeat
	S0 = S1

	if game_done:
	# Stop playing the game if the Agent completes the task or fails
	break

	# For further analysis
	rewards_per_episode.append(total_reward_for_episode)