Skip to content

Instantly share code, notes, and snippets.

@geffy
Last active November 27, 2016 20:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save geffy/a999ae87bd0f2835c6dd981f66b49606 to your computer and use it in GitHub Desktop.
Save geffy/a999ae87bd0f2835c6dd981f66b49606 to your computer and use it in GitHub Desktop.
import gym
import numpy as np
env = gym.make('FrozenLake8x8-v0')
env.reset()
# policy obtained from solving MDP
policy = np.array(
[3, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 2,
0, 0, 3, 3, 3, 3, 3, 2,
0, 0, 0, 3, 2, 3, 2, 2,
0, 0, 3, 3, 3, 3, 2, 2,
0, 0, 0, 3, 2, 3, 2, 2,
0, 0, 0, 1, 2, 2, 2, 2,
0, 0, 1, 1, 3, 2, 2, 2
])
def experiment(monitor=False, n_steps_in_round=1000):
if monitor:
env.monitor.start('/tmp/frozenlake-reproduce', force=True)
cum_reward = 0
episode_lens = []
for t_rounds in range(n_rounds):
observation = env.reset()
for t in range(n_steps_in_round):
action = policy[observation]
observation, reward, done, info = env.step(action)
cum_reward += reward
if done:
episode_lens.append(t)
break
if monitor:
env.monitor.close()
stats = map(lambda x: int(np.percentile(episode_lens, x)), [5, 25, 50, 75, 95])
return cum_reward, stats
n_rounds = 500
reward_without, stats_without = experiment(monitor=False, n_steps_in_round=1000)
reward_with, stats_with = experiment(monitor=True, n_steps_in_round=1000)
print('\n')
print('Without monitor: {} / {}'.format(reward_without, n_rounds))
print(' precentiles [5, 25, 50, 75, 95]: \n\t {}'.format(stats_without))
print('\n')
print('With monitor: {} / {}'.format(reward_with, n_rounds))
print(' percentile [5, 25, 50, 75, 95]: \n\t {}'.format(stats_with))
# Output:
# Without monitor: 500.0 / 500
# precentiles [5, 25, 50, 75, 95]:
# [40, 65, 98, 145, 263]
# With monitor: 442.0 / 500
# percentile [5, 25, 50, 75, 95]:
# [38, 66, 98, 147, 199]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment