Skip to content

Instantly share code, notes, and snippets.

@Sathishruw
Created September 15, 2018 18:35
Show Gist options
  • Save Sathishruw/7684bdb0cfdc2120ce12ea4bf7113343 to your computer and use it in GitHub Desktop.
Save Sathishruw/7684bdb0cfdc2120ce12ea4bf7113343 to your computer and use it in GitHub Desktop.
import gym
import numpy
impoort time
#Function for a random policy
def randomPolicy():
return numpy.random.choice(4, size=((16)))
#Execution
def execute(env, policy, episode_len=100, render=False):
reward = 0
obs = env.reset()
for t in range(episode_len):
if render:
env.render()
action = policy[obs]
obs, reward, done, _ = env.step(action)
total_reward += reward
if done:
break
return total_reward
#Evaluation
def optimalPolicy(env, policy, n_episodes=100):
total_rewards = 0.0
for _ in range(n_episodes):
total_rewards += run_episode(env, policy)
return total_rewards / n_episodes
if __name__ == '__main__':
env = gym.make('FrozenLake-v0')
## Policy search
maxIteration = 1000
start = time.time()
policy_set = [randomPolicy() for _ in range(maxIteration)]
policy_score = [optimalPolicy(env, p) for p in policy_set]
end = time.time()
print("Best score = %0.2f. Time taken = %4.4f seconds" %(numpy.max(policy_score) , end - start))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment