Created
March 30, 2023 14:41
-
-
Save jeremybk21/cafafbbf541d8bbbbf2a47420bc33a4c to your computer and use it in GitHub Desktop.
Exact same as OpenAI Gymnasium cartpole environment except with a continuous action space. Good for learning control methods other than RL.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Classic cart-pole system implemented by Rich Sutton et al. | |
Copied from http://incompleteideas.net/sutton/book/code/pole.c | |
permalink: https://perma.cc/C9ZM-652R | |
""" | |
import math | |
from typing import Optional, Tuple, Union | |
import numpy as np | |
import gymnasium as gym | |
from gymnasium import logger, spaces | |
from gymnasium.envs.classic_control import utils | |
from gymnasium.error import DependencyNotInstalled | |
from gymnasium.experimental.vector import VectorEnv | |
from gymnasium.vector.utils import batch_space | |
class ContinuousActionCartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): | |
""" | |
## Description | |
This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson in | |
["Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem"](https://ieeexplore.ieee.org/document/6313077). | |
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. | |
The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces | |
in the left and right direction on the cart. | |
## Action Space | |
The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction | |
of the fixed force the cart is pushed with. | |
- 0: Push cart to the left | |
- 1: Push cart to the right | |
**Note**: The velocity that is reduced or increased by the applied force is not fixed and it depends on the angle | |
the pole is pointing. The center of gravity of the pole varies the amount of energy needed to move the cart underneath it | |
## Observation Space | |
The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities: | |
| Num | Observation | Min | Max | | |
|-----|-----------------------|---------------------|-------------------| | |
| 0 | Cart Position | -4.8 | 4.8 | | |
| 1 | Cart Velocity | -Inf | Inf | | |
| 2 | Pole Angle | ~ -0.418 rad (-24°) | ~ 0.418 rad (24°) | | |
| 3 | Pole Angular Velocity | -Inf | Inf | | |
**Note:** While the ranges above denote the possible values for observation space of each element, | |
it is not reflective of the allowed values of the state space in an unterminated episode. Particularly: | |
- The cart x-position (index 0) can be take values between `(-4.8, 4.8)`, but the episode terminates | |
if the cart leaves the `(-2.4, 2.4)` range. | |
- The pole angle can be observed between `(-.418, .418)` radians (or **±24°**), but the episode terminates | |
if the pole angle is not in the range `(-.2095, .2095)` (or **±12°**) | |
## Rewards | |
Since the goal is to keep the pole upright for as long as possible, a reward of `+1` for every step taken, | |
including the termination step, is allotted. The threshold for rewards is 475 for v1. | |
## Starting State | |
All observations are assigned a uniformly random value in `(-0.05, 0.05)` | |
## Episode End | |
The episode ends if any one of the following occurs: | |
1. Termination: Pole Angle is greater than ±12° | |
2. Termination: Cart Position is greater than ±2.4 (center of the cart reaches the edge of the display) | |
3. Truncation: Episode length is greater than 500 (200 for v0) | |
## Arguments | |
```python | |
import gymnasium as gym | |
gym.make('CartPole-v1') | |
``` | |
On reset, the `options` parameter allows the user to change the bounds used to determine | |
the new random state. | |
""" | |
metadata = { | |
"render_modes": ["human", "rgb_array"], | |
"render_fps": 50, | |
} | |
def __init__(self, render_mode: Optional[str] = None): | |
self.gravity = 9.8 | |
self.masscart = 1.0 | |
self.masspole = 0.1 | |
self.total_mass = self.masspole + self.masscart | |
self.length = 0.5 # actually half the pole's length | |
self.polemass_length = self.masspole * self.length | |
self.force_mag = 10.0 | |
self.tau = 0.02 # seconds between state updates | |
self.kinematics_integrator = "euler" | |
self.min_action = -np.inf | |
self.max_action = np.inf | |
# Angle at which to fail the episode | |
self.theta_threshold_radians = 720 * 2 * math.pi / 360 | |
self.x_threshold = 2.4 | |
# Angle limit set to 2 * theta_threshold_radians so failing observation | |
# is still within bounds. | |
high = np.array( | |
[ | |
self.x_threshold * 2, | |
np.finfo(np.float32).max, | |
self.theta_threshold_radians * 2, | |
np.finfo(np.float32).max, | |
], | |
dtype=np.float32, | |
) | |
self.action_space = spaces.Box( | |
low=self.min_action, | |
high=self.max_action, | |
shape=(), | |
dtype=np.float32, | |
) | |
self.observation_space = spaces.Box(-high, high, dtype=np.float32) | |
self.render_mode = render_mode | |
self.screen_width = 600 | |
self.screen_height = 400 | |
self.screen = None | |
self.clock = None | |
self.isopen = True | |
self.state = None | |
self.steps_beyond_terminated = None | |
def step(self, action): | |
assert self.action_space.contains(action), \ | |
"%r (%s) invalid" % (action, type(action)) | |
assert self.state is not None, "Call reset before using step method." | |
x, x_dot, theta, theta_dot = self.state | |
force = self.force_mag * float(action) | |
costheta = math.cos(theta) | |
sintheta = math.sin(theta) | |
# For the interested reader: | |
# https://coneural.org/florian/papers/05_cart_pole.pdf | |
temp = ( | |
force + self.polemass_length * theta_dot**2 * sintheta | |
) / self.total_mass | |
thetaacc = (self.gravity * sintheta - costheta * temp) / ( | |
self.length * (4.0 / 3.0 - self.masspole * costheta**2 / self.total_mass) | |
) | |
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass | |
if self.kinematics_integrator == "euler": | |
x = x + self.tau * x_dot | |
x_dot = x_dot + self.tau * xacc | |
theta = theta + self.tau * theta_dot | |
theta_dot = theta_dot + self.tau * thetaacc | |
else: # semi-implicit euler | |
x_dot = x_dot + self.tau * xacc | |
x = x + self.tau * x_dot | |
theta_dot = theta_dot + self.tau * thetaacc | |
theta = theta + self.tau * theta_dot | |
self.state = (x, x_dot, theta, theta_dot) | |
terminated = bool( | |
x < -self.x_threshold | |
or x > self.x_threshold | |
or theta < -self.theta_threshold_radians | |
or theta > self.theta_threshold_radians | |
) | |
if not terminated: | |
reward = 1.0 | |
elif self.steps_beyond_terminated is None: | |
# Pole just fell! | |
self.steps_beyond_terminated = 0 | |
reward = 1.0 | |
else: | |
if self.steps_beyond_terminated == 0: | |
logger.warn( | |
"You are calling 'step()' even though this " | |
"environment has already returned terminated = True. You " | |
"should always call 'reset()' once you receive 'terminated = " | |
"True' -- any further steps are undefined behavior." | |
) | |
self.steps_beyond_terminated += 1 | |
reward = 0.0 | |
if self.render_mode == "human": | |
self.render() | |
return np.array(self.state, dtype=np.float32), reward, terminated, False, {} | |
def reset( | |
self, | |
*, | |
seed: Optional[int] = None, | |
options: Optional[dict] = None, | |
): | |
super().reset(seed=seed) | |
# Note that if you use custom reset bounds, it may lead to out-of-bound | |
# state/observations. | |
low, high = utils.maybe_parse_reset_bounds( | |
options, -0.05, 0.05 # default low | |
) # default high | |
self.state = self.np_random.uniform(low=low, high=high, size=(4,)) | |
self.steps_beyond_terminated = None | |
if self.render_mode == "human": | |
self.render() | |
return np.array(self.state, dtype=np.float32), {} | |
def render(self): | |
if self.render_mode is None: | |
assert self.spec is not None | |
gym.logger.warn( | |
"You are calling render method without specifying any render mode. " | |
"You can specify the render_mode at initialization, " | |
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")' | |
) | |
return | |
try: | |
import pygame | |
from pygame import gfxdraw | |
except ImportError as e: | |
raise DependencyNotInstalled( | |
"pygame is not installed, run `pip install gymnasium[classic-control]`" | |
) from e | |
if self.screen is None: | |
pygame.init() | |
if self.render_mode == "human": | |
pygame.display.init() | |
self.screen = pygame.display.set_mode( | |
(self.screen_width, self.screen_height) | |
) | |
else: # mode == "rgb_array" | |
self.screen = pygame.Surface((self.screen_width, self.screen_height)) | |
if self.clock is None: | |
self.clock = pygame.time.Clock() | |
world_width = self.x_threshold * 2 | |
scale = self.screen_width / world_width | |
polewidth = 10.0 | |
polelen = scale * (2 * self.length) | |
cartwidth = 50.0 | |
cartheight = 30.0 | |
if self.state is None: | |
return None | |
x = self.state | |
self.surf = pygame.Surface((self.screen_width, self.screen_height)) | |
self.surf.fill((255, 255, 255)) | |
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2 | |
axleoffset = cartheight / 4.0 | |
cartx = x[0] * scale + self.screen_width / 2.0 # MIDDLE OF CART | |
carty = 100 # TOP OF CART | |
cart_coords = [(l, b), (l, t), (r, t), (r, b)] | |
cart_coords = [(c[0] + cartx, c[1] + carty) for c in cart_coords] | |
gfxdraw.aapolygon(self.surf, cart_coords, (0, 0, 0)) | |
gfxdraw.filled_polygon(self.surf, cart_coords, (0, 0, 0)) | |
l, r, t, b = ( | |
-polewidth / 2, | |
polewidth / 2, | |
polelen - polewidth / 2, | |
-polewidth / 2, | |
) | |
pole_coords = [] | |
for coord in [(l, b), (l, t), (r, t), (r, b)]: | |
coord = pygame.math.Vector2(coord).rotate_rad(-x[2]) | |
coord = (coord[0] + cartx, coord[1] + carty + axleoffset) | |
pole_coords.append(coord) | |
gfxdraw.aapolygon(self.surf, pole_coords, (202, 152, 101)) | |
gfxdraw.filled_polygon(self.surf, pole_coords, (202, 152, 101)) | |
gfxdraw.aacircle( | |
self.surf, | |
int(cartx), | |
int(carty + axleoffset), | |
int(polewidth / 2), | |
(129, 132, 203), | |
) | |
gfxdraw.filled_circle( | |
self.surf, | |
int(cartx), | |
int(carty + axleoffset), | |
int(polewidth / 2), | |
(129, 132, 203), | |
) | |
gfxdraw.hline(self.surf, 0, self.screen_width, carty, (0, 0, 0)) | |
self.surf = pygame.transform.flip(self.surf, False, True) | |
self.screen.blit(self.surf, (0, 0)) | |
if self.render_mode == "human": | |
pygame.event.pump() | |
self.clock.tick(self.metadata["render_fps"]) | |
pygame.display.flip() | |
elif self.render_mode == "rgb_array": | |
return np.transpose( | |
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) | |
) | |
def close(self): | |
if self.screen is not None: | |
import pygame | |
pygame.display.quit() | |
pygame.quit() | |
self.isopen = False | |
class CartPoleVectorEnv(VectorEnv): | |
metadata = { | |
"render_modes": ["human", "rgb_array"], | |
"render_fps": 50, | |
} | |
def __init__( | |
self, | |
num_envs: int = 2, | |
max_episode_steps: int = 500, | |
render_mode: Optional[str] = None, | |
): | |
super().__init__() | |
self.num_envs = num_envs | |
self.gravity = 9.8 | |
self.masscart = 1.0 | |
self.masspole = 0.1 | |
self.total_mass = self.masspole + self.masscart | |
self.length = 0.5 # actually half the pole's length | |
self.polemass_length = self.masspole * self.length | |
self.force_mag = 10.0 | |
self.tau = 0.002 # seconds between state updates | |
self.kinematics_integrator = "euler" | |
self.max_episode_steps = max_episode_steps | |
self.steps = np.zeros(num_envs, dtype=np.int32) | |
# Angle at which to fail the episode | |
self.theta_threshold_radians = 12 * 2 * math.pi / 360 | |
self.x_threshold = 2.4 | |
# Angle limit set to 2 * theta_threshold_radians so failing observation | |
# is still within bounds. | |
high = np.array( | |
[ | |
self.x_threshold * 2, | |
np.finfo(np.float32).max, | |
self.theta_threshold_radians * 2, | |
np.finfo(np.float32).max, | |
], | |
dtype=np.float32, | |
) | |
self.low = -0.05 | |
self.high = 0.05 | |
self.action_space = batch_space(spaces.Discrete(2), num_envs) | |
self.observation_space = batch_space( | |
spaces.Box(-high, high, dtype=np.float32), num_envs | |
) | |
self.render_mode = render_mode | |
self.screen_width = 600 | |
self.screen_height = 400 | |
self.screens = None | |
self.clocks = None | |
self.isopen = True | |
self.state = None | |
self.steps_beyond_terminated = None | |
def step( | |
self, action: np.ndarray | |
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, dict]: | |
assert self.action_space.contains( | |
action | |
), f"{action!r} ({type(action)}) invalid" | |
assert self.state is not None, "Call reset before using step method." | |
x, x_dot, theta, theta_dot = self.state | |
force = np.sign(action - 0.5) * self.force_mag | |
costheta = np.cos(theta) | |
sintheta = np.sin(theta) | |
# For the interested reader: | |
# https://coneural.org/florian/papers/05_cart_pole.pdf | |
temp = ( | |
force + self.polemass_length * theta_dot**2 * sintheta | |
) / self.total_mass | |
thetaacc = (self.gravity * sintheta - costheta * temp) / ( | |
self.length * (4.0 / 3.0 - self.masspole * costheta**2 / self.total_mass) | |
) | |
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass | |
if self.kinematics_integrator == "euler": | |
x = x + self.tau * x_dot | |
x_dot = x_dot + self.tau * xacc | |
theta = theta + self.tau * theta_dot | |
theta_dot = theta_dot + self.tau * thetaacc | |
else: # semi-implicit euler | |
x_dot = x_dot + self.tau * xacc | |
x = x + self.tau * x_dot | |
theta_dot = theta_dot + self.tau * thetaacc | |
theta = theta + self.tau * theta_dot | |
self.state = np.stack((x, x_dot, theta, theta_dot)) | |
terminated: np.ndarray = ( | |
(x < -self.x_threshold) | |
| (x > self.x_threshold) | |
| (theta < -self.theta_threshold_radians) | |
| (theta > self.theta_threshold_radians) | |
) | |
self.steps += 1 | |
truncated = self.steps >= self.max_episode_steps | |
done = terminated | truncated | |
if any(done): | |
# This code was generated by copilot, need to check if it works | |
self.state[:, done] = self.np_random.uniform( | |
low=self.low, high=self.high, size=(4, done.sum()) | |
).astype(np.float32) | |
self.steps[done] = 0 | |
reward = np.ones_like(terminated, dtype=np.float32) | |
if self.render_mode == "human": | |
self.render() | |
return self.state.T, reward, terminated, truncated, {} | |
def reset( | |
self, | |
*, | |
seed: Optional[int] = None, | |
options: Optional[dict] = None, | |
): | |
super().reset(seed=seed) | |
# Note that if you use custom reset bounds, it may lead to out-of-bound | |
# state/observations. | |
self.low, self.high = utils.maybe_parse_reset_bounds( | |
options, -0.05, 0.05 # default low | |
) # default high | |
self.state = self.np_random.uniform( | |
low=self.low, high=self.high, size=(4, self.num_envs) | |
).astype(np.float32) | |
self.steps_beyond_terminated = None | |
if self.render_mode == "human": | |
self.render() | |
return self.state.T, {} | |
def render(self): | |
if self.render_mode is None: | |
gym.logger.warn( | |
"You are calling render method without specifying any render mode. " | |
"You can specify the render_mode at initialization, " | |
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' | |
) | |
return | |
try: | |
import pygame | |
from pygame import gfxdraw | |
except ImportError: | |
raise DependencyNotInstalled( | |
"pygame is not installed, run `pip install gymnasium[classic_control]`" | |
) | |
if self.screens is None: | |
pygame.init() | |
if self.render_mode == "human": | |
pygame.display.init() | |
self.screens = [ | |
pygame.display.set_mode((self.screen_width, self.screen_height)) | |
for _ in range(self.num_envs) | |
] | |
else: # mode == "rgb_array" | |
self.screens = [ | |
pygame.Surface((self.screen_width, self.screen_height)) | |
for _ in range(self.num_envs) | |
] | |
if self.clocks is None: | |
self.clock = [pygame.time.Clock() for _ in range(self.num_envs)] | |
world_width = self.x_threshold * 2 | |
scale = self.screen_width / world_width | |
polewidth = 10.0 | |
polelen = scale * (2 * self.length) | |
cartwidth = 50.0 | |
cartheight = 30.0 | |
if self.state is None: | |
return None | |
for state, screen, clock in zip(self.state, self.screens, self.clocks): | |
x = self.state.T | |
self.surf = pygame.Surface((self.screen_width, self.screen_height)) | |
self.surf.fill((255, 255, 255)) | |
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2 | |
axleoffset = cartheight / 4.0 | |
cartx = x[0] * scale + self.screen_width / 2.0 # MIDDLE OF CART | |
carty = 100 # TOP OF CART | |
cart_coords = [(l, b), (l, t), (r, t), (r, b)] | |
cart_coords = [(c[0] + cartx, c[1] + carty) for c in cart_coords] | |
gfxdraw.aapolygon(self.surf, cart_coords, (0, 0, 0)) | |
gfxdraw.filled_polygon(self.surf, cart_coords, (0, 0, 0)) | |
l, r, t, b = ( | |
-polewidth / 2, | |
polewidth / 2, | |
polelen - polewidth / 2, | |
-polewidth / 2, | |
) | |
pole_coords = [] | |
for coord in [(l, b), (l, t), (r, t), (r, b)]: | |
coord = pygame.math.Vector2(coord).rotate_rad(-x[2]) | |
coord = (coord[0] + cartx, coord[1] + carty + axleoffset) | |
pole_coords.append(coord) | |
gfxdraw.aapolygon(self.surf, pole_coords, (202, 152, 101)) | |
gfxdraw.filled_polygon(self.surf, pole_coords, (202, 152, 101)) | |
gfxdraw.aacircle( | |
self.surf, | |
int(cartx), | |
int(carty + axleoffset), | |
int(polewidth / 2), | |
(129, 132, 203), | |
) | |
gfxdraw.filled_circle( | |
self.surf, | |
int(cartx), | |
int(carty + axleoffset), | |
int(polewidth / 2), | |
(129, 132, 203), | |
) | |
gfxdraw.hline(self.surf, 0, self.screen_width, carty, (0, 0, 0)) | |
self.surf = pygame.transform.flip(self.surf, False, True) | |
screen.blit(self.surf, (0, 0)) | |
if self.render_mode == "human": | |
pygame.event.pump() | |
[clock.tick(self.metadata["render_fps"]) for clock in self.clocks] | |
pygame.display.flip() | |
elif self.render_mode == "rgb_array": | |
return [ | |
np.transpose( | |
np.array(pygame.surfarray.pixels3d(screen)), axes=(1, 0, 2) | |
) | |
for screen in self.screens | |
] | |
def close(self): | |
if self.screens is not None: | |
import pygame | |
pygame.display.quit() | |
pygame.quit() | |
self.isopen = False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment