Skip to content

Instantly share code, notes, and snippets.

@alexlimh
Created May 14, 2020 22:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexlimh/5d7a7d538d8133a0cdc7a6f567ce8630 to your computer and use it in GitHub Desktop.
Save alexlimh/5d7a7d538d8133a0cdc7a6f567ce8630 to your computer and use it in GitHub Desktop.
3D fourroom
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
from dm_control import mujoco
from dm_control.rl import control
from dm_control.suite import base
from dm_control.suite import common
from dm_control.suite.utils import randomizers
from dm_control.utils import rewards
from dm_control.utils import containers
from dm_control.utils import xml_tools
from dm_control.utils import io as resources
import numpy as np
from lxml import etree
from six.moves import range
_DEFAULT_TIME_LIMIT = 100
SUITE = containers.TaggedTasks()
def get_model_and_assets():
"""Returns a tuple containing the model XML string and a dict of assets."""
return _make_model(), common.ASSETS
@SUITE.add()
def smooth(
time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
physics = Physics.from_xml_string(*get_model_and_assets())
task = FourRoom(random=random, sparse=False)
environment_kwargs = environment_kwargs or {}
return control.Environment(
physics, task,
time_limit=time_limit,
**environment_kwargs)
@SUITE.add()
def sparse(
time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
physics = Physics.from_xml_string(*get_model_and_assets())
task = FourRoom(random=random, sparse=True)
environment_kwargs = environment_kwargs or {}
return control.Environment(
physics, task,
time_limit=time_limit,
**environment_kwargs)
def _make_model():
"""Generates an xml string defining a cart with `n_poles` bodies."""
xml_string = common.read_model('threedfourroom.xml')
mjcf = etree.fromstring(xml_string)
return etree.tostring(mjcf, pretty_print=True)
class Physics(mujoco.Physics):
"""Physics simulation with additional features for the Reacher domain."""
def tool_to_target(self):
"""Returns the vector from target to finger in global coordinates."""
target = self.named.data.geom_xpos['target', :2]
tool = self.named.data.geom_xpos['toolgeom', :2]
return target - tool
def tool_to_target_dist(self):
"""Returns the signed distance between the finger and target surface."""
return np.linalg.norm(self.tool_to_target())
def camera_angle(self):
tool = self.named.data.geom_xpos['toolgeom', :2]
cam_target = self.named.data.geom_xpos['cam_target', :2]
delta = cam_target - tool
angle = -np.arctan2(delta[0], delta[1])
return angle
class FourRoom(base.Task):
"""A reacher `Task` to reach the target."""
def __init__(self, random=None, sparse=False):
"""Initialize an instance of `Reacher`.
Args:
target_size: A `float`, tolerance to determine whether finger reached the
target.
random: Optional, either a `numpy.random.RandomState` instance, an
integer seed for creating a new `RandomState`, or None to select a seed
automatically (default).
"""
super(FourRoom, self).__init__(random=random)
self._sparse = sparse
def initialize_episode(self, physics, difficulty=None):
"""Sets the state of the environment at the start of each episode."""
#randomizers.randomize_limited_and_rotational_joints(physics, self.random)
#w1, w2 = 0, 0
#physics.named.model.geom_pos['wall_1a', 'y'] = w1 + 0.6
#physics.named.model.geom_pos['wall_1b', 'y'] = w1 - 0.4
#physics.named.model.geom_pos['wall_2a', 'y'] = w2 + 0.3
#physics.named.model.geom_pos['wall_2b', 'y'] = w2 - 0.7
#physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-0.25, 0.25)
super(FourRoom, self).initialize_episode(physics)
def get_observation(self, physics):
"""Returns an observation of the state and the target position."""
obs = collections.OrderedDict()
obs['position'] = self.named.data.geom_xpos['toolgeom', :2] # physics.position()
obs['to_target'] = physics.tool_to_target()
obs['velocity'] = physics.velocity()
obs['camera_angle'] = physics.camera_angle()
return obs
def get_reward(self, physics):
if self._sparse:
radii = physics.named.model.geom_size[['target', 'toolgeom'], 0].sum()
return rewards.tolerance(physics.tool_to_target_dist(), (0, radii))
else:
max_distance = 0.67 # Estimated empirically.
cur_distance = physics.tool_to_target_dist()
return 1 - np.clip(cur_distance / max_distance, 0, 1)
<mujoco model="two-link planar reacher">
<include file="./common/skybox.xml"/>
<include file="./common/visual.xml"/>
<include file="./common/materials.xml"/>
<option timestep="0.1" />
<default>
<general ctrllimited="true"/>
<default class="tool">
<joint type="hinge" limited="false" range="-1 1" damping="2"/>
<geom material="self"/>
</default>
</default>
<worldbody>
<light name="light" pos="0 0 1"/>
<camera name="fixed" pos="0 0 1.5" quat="1 0 0 0"/>
<geom name="ground" rgba="0 0 0 1" type="plane" pos="0 0 0" size="1 1 1" material="grid"/>
<geom name="wall_n" type="box" pos=" 0 0.3 0.02" zaxis=" 0 -1 0" size="0.3 0.05 0.015" material="decoration"/>
<geom name="wall_e" type="box" pos=" 0.3 0 0.02" zaxis="-1 0 0" size="0.05 0.31 0.015" material="decoration"/>
<geom name="wall_s" type="box" pos=" 0 -0.3 0.02" zaxis=" 0 -1 0" size="0.3 0.05 0.015" material="decoration"/>
<geom name="wall_w" type="box" pos="-0.3 0 0.02" zaxis=" -1 0 0" size="0.05 0.31 0.015" material="decoration"/>
<geom name="wall_1a" type="box" pos="0 0.27 .02" zaxis="-1 0 0" size=".05 .03 .015" material="decoration" />
<geom name="wall_1b" type="box" pos="0.27 0 .02" zaxis="0 1 0" size=".03 .05 .015" material="decoration" />
<geom name="wall_2a" type="box" pos="0 -0.27 .02" zaxis="-1 0 0" size=".05 .03 .015" material="decoration" />
<geom name="wall_2b" type="box" pos="-0.27 0 .02" zaxis="0 1 0" size=".03 .05 .015" material="decoration" />
<geom name="wall_3a" type="box" pos=" 0 0 .02" zaxis="-1 0 0" size=".05 .15 .015" material="decoration" />
<geom name="wall_3b" type="box" pos=" 0 0 .02" zaxis="0 1 0" size=".15 .05 .015" material="decoration" />
<body name="cam_target" pos="0 0 0.02">
<joint name="cam_target_yaw" pos="0 0 0" axis="0 0 1"/>
<geom name="cam_target" rgba="0 0 0 0" type="box" size="0.018 0.018 0.02" pos="-0.55 0 0"/>
</body>
<body name="torso" pos="-0.25 -0.25 0.02" childclass="tool" >
<geom name="toolgeom" rgba="1 1 1 1" type="box" size="0.02 0.02 0.02" pos="0 0 0" />
<camera name="eyes" pos="0 0 0" xyaxes="1 0 0 0 0 1" mode="targetbodycom" target="cam_target"/>
<joint name="tool_x" type="slide" pos="0 0 0" axis="1 0 0" range="-1 1" damping="0.3" armature='0.0'/>
<joint name="tool_y" type="slide" pos="0 0 0" axis="0 1 0" range="-1 1" damping="0.3" armature='0.0'/>
</body>
<geom name="target" pos="0.25 0.25 0.03" material="target" type="sphere" size="0.03" contype="0" conaffinity="0"/>
</worldbody>
<actuator>
<motor name="x" joint="tool_x" ctrlrange="-1 1" gear="0.05" ctrllimited="true" />
<motor name="y" joint="tool_y" ctrlrange="-1 1" gear="0.05" ctrllimited="true" />
<motor name="cam_target" joint="cam_target_yaw" ctrlrange="-1 1" gear="0.05"/>
</actuator>
</mujoco>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment