Skip to content

Instantly share code, notes, and snippets.

@araffin
Last active March 27, 2024 11:56
Show Gist options
  • Save araffin/25159d668e9bad41bf31a595add22c27 to your computer and use it in GitHub Desktop.
Save araffin/25159d668e9bad41bf31a595add22c27 to your computer and use it in GitHub Desktop.
import gymnasium as gym
import numpy as np
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv
# Env initialization
env = gym.make("Swimmer-v4", render_mode="human")
# Wrap to have reward statistics
env = gym.wrappers.RecordEpisodeStatistics(env)
mujoco_env = env.unwrapped
n_joints = 2
assert isinstance(mujoco_env, MujocoEnv)
# PD Controller gains
kp, kd = 10, 0.5
# Reset the environment
t, _ = 0.0, env.reset(seed=0)
# Oscillators parameters
omega = 2 * np.pi * 0.62 * np.ones(n_joints)
phase = 2 * np.pi * np.array([0.00, 0.95])
while True:
env.render()
# Open-Loop Control using oscillators
desired_qpos = np.sin(omega * t + phase)
# PD Control: convert to torque, desired qvel is zero
desired_torques = (
kp * (desired_qpos - mujoco_env.data.qpos[-n_joints:])
- kd * mujoco_env.data.qvel[-n_joints:]
)
desired_torques = np.clip(desired_torques, -1.0, 1.0) # clip to action bounds
_, reward, terminated, truncated, info = env.step(desired_torques)
t += mujoco_env.dt
if terminated or truncated:
print(f"Episode return: {float(info['episode']['r'].item()):.2f}")
t, _ = 0.0, env.reset()
@araffin
Copy link
Author

araffin commented Mar 27, 2024

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment