Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Continuous Cartpole for OpenAI Gym
"""
Classic cart-pole system implemented by Rich Sutton et al.
Copied from http://incompleteideas.net/sutton/book/code/pole.c
permalink: https://perma.cc/C9ZM-652R
Continuous version by Ian Danforth
"""
import math
import gym
from gym import spaces, logger
from gym.utils import seeding
import numpy as np
class ContinuousCartPoleEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 50
}
def __init__(self):
self.gravity = 9.8
self.masscart = 1.0
self.masspole = 0.1
self.total_mass = (self.masspole + self.masscart)
self.length = 0.5 # actually half the pole's length
self.polemass_length = (self.masspole * self.length)
self.force_mag = 30.0
self.tau = 0.02 # seconds between state updates
self.min_action = -1.0
self.max_action = 1.0
# Angle at which to fail the episode
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4
# Angle limit set to 2 * theta_threshold_radians so failing observation
# is still within bounds
high = np.array([
self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max])
self.action_space = spaces.Box(
low=self.min_action,
high=self.max_action,
shape=(1,)
)
self.observation_space = spaces.Box(-high, high)
self.seed()
self.viewer = None
self.state = None
self.steps_beyond_done = None
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def stepPhysics(self, force):
x, x_dot, theta, theta_dot = self.state
costheta = math.cos(theta)
sintheta = math.sin(theta)
temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
thetaacc = (self.gravity * sintheta - costheta * temp) / \
(self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
x = x + self.tau * x_dot
x_dot = x_dot + self.tau * xacc
theta = theta + self.tau * theta_dot
theta_dot = theta_dot + self.tau * thetaacc
return (x, x_dot, theta, theta_dot)
def step(self, action):
assert self.action_space.contains(action), \
"%r (%s) invalid" % (action, type(action))
# Cast action to float to strip np trappings
force = self.force_mag * float(action)
self.state = self.stepPhysics(force)
x, x_dot, theta, theta_dot = self.state
done = x < -self.x_threshold \
or x > self.x_threshold \
or theta < -self.theta_threshold_radians \
or theta > self.theta_threshold_radians
done = bool(done)
if not done:
reward = 1.0
elif self.steps_beyond_done is None:
# Pole just fell!
self.steps_beyond_done = 0
reward = 1.0
else:
if self.steps_beyond_done == 0:
logger.warn("""
You are calling 'step()' even though this environment has already returned
done = True. You should always call 'reset()' once you receive 'done = True'
Any further steps are undefined behavior.
""")
self.steps_beyond_done += 1
reward = 0.0
return np.array(self.state), reward, done, {}
def reset(self):
self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
self.steps_beyond_done = None
return np.array(self.state)
def render(self, mode='human'):
screen_width = 600
screen_height = 400
world_width = self.x_threshold * 2
scale = screen_width /world_width
carty = 100 # TOP OF CART
polewidth = 10.0
polelen = scale * 1.0
cartwidth = 50.0
cartheight = 30.0
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
axleoffset = cartheight / 4.0
cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
self.carttrans = rendering.Transform()
cart.add_attr(self.carttrans)
self.viewer.add_geom(cart)
l, r, t, b = -polewidth / 2, polewidth / 2, polelen-polewidth / 2, -polewidth / 2
pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
pole.set_color(.8, .6, .4)
self.poletrans = rendering.Transform(translation=(0, axleoffset))
pole.add_attr(self.poletrans)
pole.add_attr(self.carttrans)
self.viewer.add_geom(pole)
self.axle = rendering.make_circle(polewidth / 2)
self.axle.add_attr(self.poletrans)
self.axle.add_attr(self.carttrans)
self.axle.set_color(.5, .5, .8)
self.viewer.add_geom(self.axle)
self.track = rendering.Line((0, carty), (screen_width, carty))
self.track.set_color(0, 0, 0)
self.viewer.add_geom(self.track)
if self.state is None:
return None
x = self.state
cartx = x[0] * scale + screen_width / 2.0 # MIDDLE OF CART
self.carttrans.set_translation(cartx, carty)
self.poletrans.set_rotation(-x[2])
return self.viewer.render(return_rgb_array=(mode == 'rgb_array'))
def close(self):
if self.viewer:
self.viewer.close()
@iandanforth

This comment has been minimized.

Copy link
Owner Author

@iandanforth iandanforth commented May 18, 2018

Exactly the same as CartPole except that the action space is now continuous from -1 to 1.

@chad-green

This comment has been minimized.

Copy link

@chad-green chad-green commented Jan 26, 2019

Thanks for posting this! What have you tried for training with the continuous input?

@being-aerys

This comment has been minimized.

Copy link

@being-aerys being-aerys commented May 13, 2019

Thanks for sharing. :)

@stratisMarkou

This comment has been minimized.

Copy link

@stratisMarkou stratisMarkou commented May 5, 2020

Thanks for sharing!

@DmitryBe

This comment has been minimized.

Copy link

@DmitryBe DmitryBe commented Jun 4, 2020

Thank you!

@vuk119

This comment has been minimized.

Copy link

@vuk119 vuk119 commented Jun 24, 2020

Nice work. There is a small problem with rendering. When you run an episode and use the render option for the first time everything is fine, however, if you try to run it again, you get the following error:

in play_optimally(model, render)
10
11 if render is True:
---> 12 env.render()
13 time.sleep(0.1)
14

in render(self, mode)
148 self.poletrans.set_rotation(-x[2])
149
--> 150 return self.viewer.render(return_rgb_array=(mode == 'rgb_array'))
151
152 def close(self):

~\miniconda3\envs\Torch\lib\site-packages\gym\envs\classic_control\rendering.py in render(self, return_rgb_array)
101 glClearColor(1,1,1,1)
102 self.window.clear()
--> 103 self.window.switch_to()
104 self.window.dispatch_events()
105 self.transform.enable()

~\miniconda3\envs\Torch\lib\site-packages\pyglet\window\win32_init_.py in switch_to(self)
326
327 def switch_to(self):
--> 328 self.context.set_current()
329
330 def flip(self):

AttributeError: 'NoneType' object has no attribute 'set_current'

@misterguick

This comment has been minimized.

Copy link

@misterguick misterguick commented Feb 21, 2021

Nice work. There is a small problem with rendering. When you run an episode and use the render option for the first time everything is fine, however, if you try to run it again, you get the following error:

in play_optimally(model, render)
10
11 if render is True:
---> 12 env.render()
13 time.sleep(0.1)
14

in render(self, mode)
148 self.poletrans.set_rotation(-x[2])
149
--> 150 return self.viewer.render(return_rgb_array=(mode == 'rgb_array'))
151
152 def close(self):

~\miniconda3\envs\Torch\lib\site-packages\gym\envs\classic_control\rendering.py in render(self, return_rgb_array)
101 glClearColor(1,1,1,1)
102 self.window.clear()
--> 103 self.window.switch_to()
104 self.window.dispatch_events()
105 self.transform.enable()

~\miniconda3\envs\Torch\lib\site-packages\pyglet\window\win32__init__.py in switch_to(self)
326
327 def switch_to(self):
--> 328 self.context.set_current()
329
330 def flip(self):

AttributeError: 'NoneType' object has no attribute 'set_current'

Hello,

I was able to correct the error by copy pasting the render and close functions from the orginial (discrete) repo https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py.

Hope it helps.

Best regards.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment