Skip to content

Instantly share code, notes, and snippets.

@rish-16
Last active March 3, 2019 09:04
Show Gist options
  • Save rish-16/3b3252c0945628e579496a3f426e1dcb to your computer and use it in GitHub Desktop.
Save rish-16/3b3252c0945628e579496a3f426e1dcb to your computer and use it in GitHub Desktop.
# The main RL event loop
for i in range(num_episodes):
S0 = env.reset() # Current state
total_reward_for_episode = 0
game_done = False
for s in range(steps_per_episode):
# Choosing an action to perform in the current state
if np.random.rand() > epsilon:
action = env.action_space.sample() # Explore with a random action
else:
# Choose the action with the highest Q value from the table for the given state
action = np.argmax(q_table[S0, :])
# Perform action and get the reward and next state
S1, R, game_done, info = env.step(action)
# Update the table using the Bellman Equation
q_table[S0, action] = q_table[S0, action] + alpha(R + gamma*np.max(q_table[S1, :] - q_table[S0, action]))
# Add the reward from this state to the total episode reward
total_reward_for_episode += R
# Move on to the next state and repeat
S0 = S1
if game_done:
# Stop playing the game if the Agent completes the task or fails
break
# For further analysis
rewards_per_episode.append(total_reward_for_episode)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment