Last active
March 3, 2019 09:04
-
-
Save rish-16/3b3252c0945628e579496a3f426e1dcb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The main RL event loop | |
for i in range(num_episodes): | |
S0 = env.reset() # Current state | |
total_reward_for_episode = 0 | |
game_done = False | |
for s in range(steps_per_episode): | |
# Choosing an action to perform in the current state | |
if np.random.rand() > epsilon: | |
action = env.action_space.sample() # Explore with a random action | |
else: | |
# Choose the action with the highest Q value from the table for the given state | |
action = np.argmax(q_table[S0, :]) | |
# Perform action and get the reward and next state | |
S1, R, game_done, info = env.step(action) | |
# Update the table using the Bellman Equation | |
q_table[S0, action] = q_table[S0, action] + alpha(R + gamma*np.max(q_table[S1, :] - q_table[S0, action])) | |
# Add the reward from this state to the total episode reward | |
total_reward_for_episode += R | |
# Move on to the next state and repeat | |
S0 = S1 | |
if game_done: | |
# Stop playing the game if the Agent completes the task or fails | |
break | |
# For further analysis | |
rewards_per_episode.append(total_reward_for_episode) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment