Last active
December 21, 2018 04:13
-
-
Save pranavkantgaur/0f828d53b0615ba7cd9a0741903902bd to your computer and use it in GitHub Desktop.
Random agent for Cart Pole environment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import logging | |
import tempfile | |
import numpy as np | |
import gym | |
#from gym.wrappers.monitoring import Monitor | |
class RandomAgent(object): | |
def __init__ (self, action_space): | |
self.action_space = action_space | |
assert isinstance(action_space, gym.spaces.discrete.Discrete), 'unsupported action space for now.' | |
def act(self, current_observation, last_reward, done): | |
a = self.action_space.sample() | |
return a | |
if __name__ == '__main__': | |
# logging | |
logger = logging.getLogger() | |
logger.setLevel(logging.INFO) | |
# create environment object | |
env = gym.make('CartPole-v0') | |
# create agent | |
agent = RandomAgent(env.action_space) | |
# define maximum number of episodes of the game to be played | |
max_episodes = 500 | |
# define maximum number of steps per episode | |
max_steps_per_episode = 200 | |
# Monitor(env, outdir + '/' + ENV_NAME, force = True) | |
# Untill we are not in a terminal state | |
# create action object | |
# sample action space and store the sample in the action object | |
# pass this object to environment | |
# collect the reward, state from environment | |
# render the updated state | |
sum_reward_running = 0.0 | |
current_reward = 0 | |
done = False | |
for i in xrange(max_episodes): | |
current_observation = env.reset() # starting fresh episode | |
sum_rewards = 0 # reset rewards for next episode | |
last_reward = 0 | |
for j in xrange(max_steps_per_episode): | |
action = agent.act(current_observation, last_reward, done) | |
next_observation, current_reward, done, _ = env.step(action) | |
sum_rewards += current_reward | |
if done: | |
break | |
current_observation = next_observation # if not done, go for next iteration in the current episode | |
last_reward = current_reward | |
sum_reward_running = 0.95 * sum_reward_running + sum_rewards * 0.05 # cumulative reward across all episodes of the game play | |
print '%d running reward: %f' % (i, sum_reward_running) | |
# just done playing all max_episode game sessions!! | |
# dump monitor info to disk | |
# env.monitor.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment