geffy/difference_snippet.py

## difference_snippet.py
import gym
import numpy as np

env = gym.make('FrozenLake8x8-v0')
env.reset()

# policy obtained from solving MDP
policy = np.array(
    [3, 2, 2, 2, 2, 2, 2, 2,
     3, 3, 3, 3, 3, 3, 3, 2,
     0, 0, 3, 3, 3, 3, 3, 2,
     0, 0, 0, 3, 2, 3, 2, 2,
     0, 0, 3, 3, 3, 3, 2, 2,
     0, 0, 0, 3, 2, 3, 2, 2,
     0, 0, 0, 1, 2, 2, 2, 2,
     0, 0, 1, 1, 3, 2, 2, 2
    ])

def experiment(monitor=False, n_steps_in_round=1000):
    if monitor:
        env.monitor.start('/tmp/frozenlake-reproduce', force=True)
    cum_reward = 0
    episode_lens = []
    for t_rounds in range(n_rounds):
        observation = env.reset()
        for t in range(n_steps_in_round):
            action = policy[observation]
            observation, reward, done, info = env.step(action)
            cum_reward += reward
            if done:
                episode_lens.append(t)
                break
    if monitor:
        env.monitor.close()
    stats = map(lambda x: int(np.percentile(episode_lens, x)), [5, 25, 50, 75, 95])
    return cum_reward, stats

n_rounds = 500
reward_without, stats_without = experiment(monitor=False, n_steps_in_round=1000)
reward_with, stats_with = experiment(monitor=True, n_steps_in_round=1000)

print('\n')
print('Without monitor: {} / {}'.format(reward_without, n_rounds))
print(' precentiles [5, 25, 50, 75, 95]: \n\t {}'.format(stats_without))
print('\n')
print('With monitor: {} / {}'.format(reward_with, n_rounds))
print(' percentile [5, 25, 50, 75, 95]: \n\t {}'.format(stats_with))

# Output:
#  Without monitor: 500.0 / 500
#   precentiles [5, 25, 50, 75, 95]:
# 	  [40, 65, 98, 145, 263]


#  With monitor: 442.0 / 500
#   percentile [5, 25, 50, 75, 95]:
# 	  [38, 66, 98, 147, 199]
	import gym
	import numpy as np

	env = gym.make('FrozenLake8x8-v0')
	env.reset()

	# policy obtained from solving MDP
	policy = np.array(
	[3, 2, 2, 2, 2, 2, 2, 2,
	3, 3, 3, 3, 3, 3, 3, 2,
	0, 0, 3, 3, 3, 3, 3, 2,
	0, 0, 0, 3, 2, 3, 2, 2,
	0, 0, 3, 3, 3, 3, 2, 2,
	0, 0, 0, 3, 2, 3, 2, 2,
	0, 0, 0, 1, 2, 2, 2, 2,
	0, 0, 1, 1, 3, 2, 2, 2
	])

	def experiment(monitor=False, n_steps_in_round=1000):
	if monitor:
	env.monitor.start('/tmp/frozenlake-reproduce', force=True)
	cum_reward = 0
	episode_lens = []
	for t_rounds in range(n_rounds):
	observation = env.reset()
	for t in range(n_steps_in_round):
	action = policy[observation]
	observation, reward, done, info = env.step(action)
	cum_reward += reward
	if done:
	episode_lens.append(t)
	break
	if monitor:
	env.monitor.close()
	stats = map(lambda x: int(np.percentile(episode_lens, x)), [5, 25, 50, 75, 95])
	return cum_reward, stats

	n_rounds = 500
	reward_without, stats_without = experiment(monitor=False, n_steps_in_round=1000)
	reward_with, stats_with = experiment(monitor=True, n_steps_in_round=1000)

	print('\n')
	print('Without monitor: {} / {}'.format(reward_without, n_rounds))
	print(' precentiles [5, 25, 50, 75, 95]: \n\t {}'.format(stats_without))
	print('\n')
	print('With monitor: {} / {}'.format(reward_with, n_rounds))
	print(' percentile [5, 25, 50, 75, 95]: \n\t {}'.format(stats_with))

	# Output:
	# Without monitor: 500.0 / 500
	# precentiles [5, 25, 50, 75, 95]:
	# [40, 65, 98, 145, 263]


	# With monitor: 442.0 / 500
	# percentile [5, 25, 50, 75, 95]:
	# [38, 66, 98, 147, 199]