Skip to content

Instantly share code, notes, and snippets.

@pranavkantgaur
Last active December 21, 2018 04:13
Show Gist options
  • Save pranavkantgaur/0f828d53b0615ba7cd9a0741903902bd to your computer and use it in GitHub Desktop.
Save pranavkantgaur/0f828d53b0615ba7cd9a0741903902bd to your computer and use it in GitHub Desktop.
Random agent for Cart Pole environment
import os
import logging
import tempfile
import numpy as np
import gym
#from gym.wrappers.monitoring import Monitor
class RandomAgent(object):
def __init__ (self, action_space):
self.action_space = action_space
assert isinstance(action_space, gym.spaces.discrete.Discrete), 'unsupported action space for now.'
def act(self, current_observation, last_reward, done):
a = self.action_space.sample()
return a
if __name__ == '__main__':
# logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# create environment object
env = gym.make('CartPole-v0')
# create agent
agent = RandomAgent(env.action_space)
# define maximum number of episodes of the game to be played
max_episodes = 500
# define maximum number of steps per episode
max_steps_per_episode = 200
# Monitor(env, outdir + '/' + ENV_NAME, force = True)
# Untill we are not in a terminal state
# create action object
# sample action space and store the sample in the action object
# pass this object to environment
# collect the reward, state from environment
# render the updated state
sum_reward_running = 0.0
current_reward = 0
done = False
for i in xrange(max_episodes):
current_observation = env.reset() # starting fresh episode
sum_rewards = 0 # reset rewards for next episode
last_reward = 0
for j in xrange(max_steps_per_episode):
action = agent.act(current_observation, last_reward, done)
next_observation, current_reward, done, _ = env.step(action)
sum_rewards += current_reward
if done:
break
current_observation = next_observation # if not done, go for next iteration in the current episode
last_reward = current_reward
sum_reward_running = 0.95 * sum_reward_running + sum_rewards * 0.05 # cumulative reward across all episodes of the game play
print '%d running reward: %f' % (i, sum_reward_running)
# just done playing all max_episode game sessions!!
# dump monitor info to disk
# env.monitor.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment