Skip to content

Instantly share code, notes, and snippets.

@SolClover
Created October 16, 2022 06:49
Show Gist options
  • Save SolClover/d8f8e408d3c55c0dfcb6ffe0ea5a8dda to your computer and use it in GitHub Desktop.
Save SolClover/d8f8e408d3c55c0dfcb6ffe0ea5a8dda to your computer and use it in GitHub Desktop.
Function to evaluate agent's performance
def evaluate_agent(n_max_steps, n_eval_episodes, Qtable):
# Initialize an empty list to store rewards for each episode
episode_rewards=[]
# Evaluate for each episode
for episode in range(n_eval_episodes):
# Reset the environment at the start of each episode
state, info = env.reset()
t = 0
done = False
tot_episode_reward = 0
for t in range(n_max_steps):
# Use greedy policy to evaluate
action = eval_greedy(Qtable, state)
# Pass action into step function
next_state, reward, done, _, info = env.step(action)
# Sum episode rewards
tot_episode_reward += reward
# Update current state
state = next_state
# Finish the episode when done=True, i.e., reached the goal or fallen into a hole
if done:
break
episode_rewards.append(tot_episode_reward)
mean_reward = np.mean(episode_rewards)
std_reward = np.std(episode_rewards)
return mean_reward, std_reward, episode_rewards
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment