Skip to content

Instantly share code, notes, and snippets.

@iandanforth
Last active March 19, 2024 14:53
Show Gist options
  • Star 34 You must be signed in to star a gist
  • Fork 7 You must be signed in to fork a gist
  • Save iandanforth/e3ffb67cf3623153e968f2afdfb01dc8 to your computer and use it in GitHub Desktop.
Save iandanforth/e3ffb67cf3623153e968f2afdfb01dc8 to your computer and use it in GitHub Desktop.
Continuous Cartpole for OpenAI Gym
"""
Classic cart-pole system implemented by Rich Sutton et al.
Copied from http://incompleteideas.net/sutton/book/code/pole.c
permalink: https://perma.cc/C9ZM-652R
Continuous version by Ian Danforth
"""
import math
import gym
from gym import spaces, logger
from gym.utils import seeding
import numpy as np
class ContinuousCartPoleEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 50
}
def __init__(self):
self.gravity = 9.8
self.masscart = 1.0
self.masspole = 0.1
self.total_mass = (self.masspole + self.masscart)
self.length = 0.5 # actually half the pole's length
self.polemass_length = (self.masspole * self.length)
self.force_mag = 30.0
self.tau = 0.02 # seconds between state updates
self.min_action = -1.0
self.max_action = 1.0
# Angle at which to fail the episode
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4
# Angle limit set to 2 * theta_threshold_radians so failing observation
# is still within bounds
high = np.array([
self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max])
self.action_space = spaces.Box(
low=self.min_action,
high=self.max_action,
shape=(1,)
)
self.observation_space = spaces.Box(-high, high)
self.seed()
self.viewer = None
self.state = None
self.steps_beyond_done = None
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def stepPhysics(self, force):
x, x_dot, theta, theta_dot = self.state
costheta = math.cos(theta)
sintheta = math.sin(theta)
temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
thetaacc = (self.gravity * sintheta - costheta * temp) / \
(self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
x = x + self.tau * x_dot
x_dot = x_dot + self.tau * xacc
theta = theta + self.tau * theta_dot
theta_dot = theta_dot + self.tau * thetaacc
return (x, x_dot, theta, theta_dot)
def step(self, action):
assert self.action_space.contains(action), \
"%r (%s) invalid" % (action, type(action))
# Cast action to float to strip np trappings
force = self.force_mag * float(action)
self.state = self.stepPhysics(force)
x, x_dot, theta, theta_dot = self.state
done = x < -self.x_threshold \
or x > self.x_threshold \
or theta < -self.theta_threshold_radians \
or theta > self.theta_threshold_radians
done = bool(done)
if not done:
reward = 1.0
elif self.steps_beyond_done is None:
# Pole just fell!
self.steps_beyond_done = 0
reward = 1.0
else:
if self.steps_beyond_done == 0:
logger.warn("""
You are calling 'step()' even though this environment has already returned
done = True. You should always call 'reset()' once you receive 'done = True'
Any further steps are undefined behavior.
""")
self.steps_beyond_done += 1
reward = 0.0
return np.array(self.state), reward, done, {}
def reset(self):
self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
self.steps_beyond_done = None
return np.array(self.state)
def render(self, mode='human'):
screen_width = 600
screen_height = 400
world_width = self.x_threshold * 2
scale = screen_width /world_width
carty = 100 # TOP OF CART
polewidth = 10.0
polelen = scale * 1.0
cartwidth = 50.0
cartheight = 30.0
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
axleoffset = cartheight / 4.0
cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
self.carttrans = rendering.Transform()
cart.add_attr(self.carttrans)
self.viewer.add_geom(cart)
l, r, t, b = -polewidth / 2, polewidth / 2, polelen-polewidth / 2, -polewidth / 2
pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
pole.set_color(.8, .6, .4)
self.poletrans = rendering.Transform(translation=(0, axleoffset))
pole.add_attr(self.poletrans)
pole.add_attr(self.carttrans)
self.viewer.add_geom(pole)
self.axle = rendering.make_circle(polewidth / 2)
self.axle.add_attr(self.poletrans)
self.axle.add_attr(self.carttrans)
self.axle.set_color(.5, .5, .8)
self.viewer.add_geom(self.axle)
self.track = rendering.Line((0, carty), (screen_width, carty))
self.track.set_color(0, 0, 0)
self.viewer.add_geom(self.track)
if self.state is None:
return None
x = self.state
cartx = x[0] * scale + screen_width / 2.0 # MIDDLE OF CART
self.carttrans.set_translation(cartx, carty)
self.poletrans.set_rotation(-x[2])
return self.viewer.render(return_rgb_array=(mode == 'rgb_array'))
def close(self):
if self.viewer:
self.viewer.close()
@iandanforth
Copy link
Author

iandanforth commented May 18, 2018

Exactly the same as CartPole except that the action space is now continuous from -1 to 1.

@chad-green
Copy link

Thanks for posting this! What have you tried for training with the continuous input?

@being-aerys
Copy link

Thanks for sharing. :)

@stratisMarkou
Copy link

Thanks for sharing!

@DmitryBe
Copy link

DmitryBe commented Jun 4, 2020

Thank you!

@vuk119
Copy link

vuk119 commented Jun 24, 2020

Nice work. There is a small problem with rendering. When you run an episode and use the render option for the first time everything is fine, however, if you try to run it again, you get the following error:

in play_optimally(model, render)
10
11 if render is True:
---> 12 env.render()
13 time.sleep(0.1)
14

in render(self, mode)
148 self.poletrans.set_rotation(-x[2])
149
--> 150 return self.viewer.render(return_rgb_array=(mode == 'rgb_array'))
151
152 def close(self):

~\miniconda3\envs\Torch\lib\site-packages\gym\envs\classic_control\rendering.py in render(self, return_rgb_array)
101 glClearColor(1,1,1,1)
102 self.window.clear()
--> 103 self.window.switch_to()
104 self.window.dispatch_events()
105 self.transform.enable()

~\miniconda3\envs\Torch\lib\site-packages\pyglet\window\win32_init_.py in switch_to(self)
326
327 def switch_to(self):
--> 328 self.context.set_current()
329
330 def flip(self):

AttributeError: 'NoneType' object has no attribute 'set_current'

@misterguick
Copy link

Nice work. There is a small problem with rendering. When you run an episode and use the render option for the first time everything is fine, however, if you try to run it again, you get the following error:

in play_optimally(model, render)
10
11 if render is True:
---> 12 env.render()
13 time.sleep(0.1)
14

in render(self, mode)
148 self.poletrans.set_rotation(-x[2])
149
--> 150 return self.viewer.render(return_rgb_array=(mode == 'rgb_array'))
151
152 def close(self):

~\miniconda3\envs\Torch\lib\site-packages\gym\envs\classic_control\rendering.py in render(self, return_rgb_array)
101 glClearColor(1,1,1,1)
102 self.window.clear()
--> 103 self.window.switch_to()
104 self.window.dispatch_events()
105 self.transform.enable()

~\miniconda3\envs\Torch\lib\site-packages\pyglet\window\win32__init__.py in switch_to(self)
326
327 def switch_to(self):
--> 328 self.context.set_current()
329
330 def flip(self):

AttributeError: 'NoneType' object has no attribute 'set_current'

Hello,

I was able to correct the error by copy pasting the render and close functions from the orginial (discrete) repo https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py.

Hope it helps.

Best regards.

@mhr
Copy link

mhr commented Oct 28, 2021

License please @iandanforth?

@bbradz
Copy link

bbradz commented Mar 19, 2024

Thank you for the great work!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment