Skip to content

Instantly share code, notes, and snippets.

@jeasinema
Last active October 16, 2018 04:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeasinema/3477b1327b1225789e946911ef6edd91 to your computer and use it in GitHub Desktop.
Save jeasinema/3477b1327b1225789e946911ef6edd91 to your computer and use it in GitHub Desktop.
Wrap DeepMind dm_control into OpenAI gym
#!/usr/bin/env python
# -*- coding:UTF-8 -*-
# File Name : dm_control_wrapper.py
# Creation Date : 09-10-2018
# Created By : Jeasine Ma [jeasinema[at]gmail[dot]com]
import glfw
try:
glfw.init()
except:
pass
from dm_control import suite
from dm_control.suite import humanoid_CMU
import gym.spaces as spaces
from gym.envs.registration import EnvSpec
import numpy as np
import cv2
class Env_DM_Control(object):
def __init__(self, name, img_size=84, camera_id='side', max_step=-1):
self.env_name = name
self.img_size = img_size
self.camera_id = camera_id
self.max_step = max_step
if self.env_name == 'Humanoid_CMU':
self.env = humanoid_CMU.run()
else:
domain, task = self.env_name.split('+')
self.env = suite.load(domain_name=domain, task_name=task)
self.control_min = self.env.action_spec().minimum[0]
self.control_max = self.env.action_spec().maximum[0]
self.control_shape = self.env.action_spec().shape
self._action_space = spaces.Box(self.control_min, self.control_max, self.control_shape)
total_size = 0
for i, j in self.env.observation_spec().items():
total_size += j.shape[0] if len(j.shape) > 0 else 1
self._observation_space = spaces.Box(-np.inf, np.inf, (total_size, ))
self.step_count = 0
self.reward_range = (-np.inf, np.inf)
self.metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 67}
self.spec = EnvSpec('Humanoid-v2', max_episode_steps=1000, timestep_limit=1000)
@property
def action_space(self):
return self._action_space
@property
def observation_space(self):
return self._observation_space
@property
def physics(self):
return self.env.physics
def reset(self):
obs = self.env.reset().observation
l = []
for i, j in obs.items():
l.append(j if len(j.shape) > 0 else j.reshape(1))
return np.concatenate(l)
def step(self, action):
ret = self.env.step(action)
l = []
for i, j in ret.observation.items():
l.append(j if len(j.shape) > 0 else j.reshape(1))
state = np.concatenate(l)
reward = ret.reward
done = (ret.step_type == 2) or (self.step_count == self.max_step)
info = {}
self.step_count += 1
if done:
self.step_count = 0
return state, reward, done, info
def render(self):
height = width = self.img_size
camera_id = self.camera_id
if camera_id:
img = self.env.physics.render(height, width, camera_id=camera_id)
else:
img = self.env.physics.render(height, width)
return img
def seed(self, seed):
if self.env_name == 'Humanoid_CMU':
self.env = humanoid_CMU.run(random=seed)
else:
domain, task = self.env_name.split('+')
self.env = suite.load(domain_name=domain, task_name=task, task_kwargs={'random':seed})
def close(self):
pass
if __name__ == '__main__':
env = Env_DM_Control('cartpole/swingup')
env.reset()
print(env.action_space)
print(env.observation_space)
while True:
env.render()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment