pranavkantgaur/random_cart_pole_agent.py

## random_cart_pole_agent.py
import os
import logging
import tempfile
import numpy as np

import gym
#from gym.wrappers.monitoring import Monitor

class RandomAgent(object):
    def __init__ (self, action_space):
        self.action_space = action_space
        assert isinstance(action_space, gym.spaces.discrete.Discrete), 'unsupported action space for now.'

    def act(self, current_observation, last_reward, done):
        a = self.action_space.sample()
        return a


if __name__ == '__main__':

  # logging
  logger = logging.getLogger()
  logger.setLevel(logging.INFO)

  # create environment object
  env = gym.make('CartPole-v0')
  # create agent
  agent = RandomAgent(env.action_space)
  # define maximum number of episodes of the game to be played
  max_episodes = 500
  # define maximum number of steps per episode
  max_steps_per_episode = 200

#  Monitor(env, outdir + '/' + ENV_NAME, force = True)
  # Untill we are not in a terminal state
    # create action object
    # sample action space and store the sample in the action object
    # pass this object to environment
    # collect the reward, state from environment
    # render the updated state
  sum_reward_running = 0.0
  current_reward = 0
  done = False

  for i in xrange(max_episodes):
      current_observation = env.reset() # starting fresh episode
      sum_rewards = 0 # reset rewards for next episode
      last_reward = 0
      for j in xrange(max_steps_per_episode):
          action = agent.act(current_observation, last_reward, done)
          next_observation, current_reward, done, _ = env.step(action)
          sum_rewards += current_reward
          if done:
              break
          current_observation = next_observation # if not done, go for next iteration in the current episode
          last_reward = current_reward

      sum_reward_running = 0.95 * sum_reward_running + sum_rewards * 0.05 # cumulative reward across all episodes of the game play
      print '%d running reward: %f' % (i, sum_reward_running)
  # just done playing all max_episode game sessions!!
  # dump monitor info to disk
 # env.monitor.close()
	import os
	import logging
	import tempfile
	import numpy as np

	import gym
	#from gym.wrappers.monitoring import Monitor

	class RandomAgent(object):
	def __init__ (self, action_space):
	self.action_space = action_space
	assert isinstance(action_space, gym.spaces.discrete.Discrete), 'unsupported action space for now.'

	def act(self, current_observation, last_reward, done):
	a = self.action_space.sample()
	return a


	if __name__ == '__main__':

	# logging
	logger = logging.getLogger()
	logger.setLevel(logging.INFO)

	# create environment object
	env = gym.make('CartPole-v0')
	# create agent
	agent = RandomAgent(env.action_space)
	# define maximum number of episodes of the game to be played
	max_episodes = 500
	# define maximum number of steps per episode
	max_steps_per_episode = 200

	# Monitor(env, outdir + '/' + ENV_NAME, force = True)
	# Untill we are not in a terminal state
	# create action object
	# sample action space and store the sample in the action object
	# pass this object to environment
	# collect the reward, state from environment
	# render the updated state
	sum_reward_running = 0.0
	current_reward = 0
	done = False

	for i in xrange(max_episodes):
	current_observation = env.reset() # starting fresh episode
	sum_rewards = 0 # reset rewards for next episode
	last_reward = 0
	for j in xrange(max_steps_per_episode):
	action = agent.act(current_observation, last_reward, done)
	next_observation, current_reward, done, _ = env.step(action)
	sum_rewards += current_reward
	if done:
	break
	current_observation = next_observation # if not done, go for next iteration in the current episode
	last_reward = current_reward

	sum_reward_running = 0.95 * sum_reward_running + sum_rewards * 0.05 # cumulative reward across all episodes of the game play
	print '%d running reward: %f' % (i, sum_reward_running)
	# just done playing all max_episode game sessions!!
	# dump monitor info to disk
	# env.monitor.close()