Skip to content

Instantly share code, notes, and snippets.

@cyrilzakka
Last active May 3, 2018 19:18
Show Gist options
  • Save cyrilzakka/78f5be3d8b27cf03bc704a5f6f472276 to your computer and use it in GitHub Desktop.
Save cyrilzakka/78f5be3d8b27cf03bc704a5f6f472276 to your computer and use it in GitHub Desktop.
Frozen Lake Environment (OpenAI Gym) Solution using Random Policy
import numpy as np
import gym
import time
def generate_random_policy():
# Generates a vector of shape (16,) with an action between 0 and 3 (inclusive)
return np.random.choice(4, size=((16)))
def run_episode(env, policy, n_episodes=100, render=False):
total_reward = 0
# Reset the environment after each episode
s = env.reset()
for i in range(n_episodes):
if render:
env.render()
s, reward, done, _ = env.step(policy[s])
total_reward += reward
if done:
break
return total_reward
def evaluate_policy(env, policy, n_episodes=100):
total_rewards = 0.0
for _ in range(n_episodes):
total_rewards += run_episode(env, policy)
return total_rewards / n_episodes
if __name__ == '__main__':
env = gym.make('FrozenLake-v0')
n_policies = 2000
start = time.time()
policy_set = [generate_random_policy() for _ in range(n_policies)]
policy_score = [evaluate_policy(env, p) for p in policy_set]
end = time.time()
print("Best score = %0.2f. Time taken = %4.4f seconds" %(np.max(policy_score) , end - start))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment