alexlimh/Minecraft.md

## Minecraft.md

      
    Raw
  

              Minecraft.md
            
          
    Running Minecraft in Headless Linux System

Prerequisite


xpra
sudo apt-get install xpra


jdk 1.8
sudo add-apt-repository ppa:openjdk-r/ppa
sudo apt-get update
sudo apt-get install openjdk-8-jdk


minerl
pip install --upgrade minerl


Setup Your Working Environment


Modify the environment variables in the minerl library (to avoid gradle lock)


open /path/to/minerl/env/malmo.py, it should be somewhere in the virtual environment created by anaconda or pip.


make the following modifications from line 86 to 89 in malmo.py
 # MINECRAFT_DIR = os.path.join(os.path.dirname(__file__), 'Malmo', 'Minecraft')
 # SCHEMAS_DIR = os.path.join(os.path.dirname(__file__), 'Malmo', 'Schemas')
 # STATUS_DIR = os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), 						'performance')
 
 MINECRAFT_DIR = os.environ['MINECRAFT_DIR']
 SCHEMAS_DIR = os.environ['SCHEMAS_DIR']
 STATUS_DIR = os.environ['STATUS_DIR']


Build Minecraft


Download Malmo to your home directory (or anywhere you like)
git clone -b 0.37.0 https://github.com/microsoft/malmo.git /path/to/Malmo


build Minecraft
cd /path/to/Malmo/Minecraft

xpra start :10 
export DISPLAY=:10
# port must be greater than 10, set a different port for each experiment
# if this doesn't work, try 'xpra start --systemd-run=no :10'

./launchClient.sh


If it is successfully built, you can see it outputs "[Build 95%] Client Dormant..." or something like that


Terminate the program if successfully built.


Replicate Minecraft working directory to your each experiment run directory
cp -r /path/to/Malmo /path/to/your/logdir


Launch Minecraft


Launch xpra and set environment variables
xpra start :10 
export DISPLAY=:10
# port must be greater than 10, set a different port for each experiment
# if this doesn't work, try 'xpra start --systemd-run=no :10'

export GRADLE_USER_HOME=/path/to/your/logdir/Malmo/Minecraft
export MINECRAFT_DIR=/path/to/your/logdir/Malmo/Minecraft
export SCHEMAS_DIR=/path/to/your/logdir/Malmo/Schemas
export STATUS_DIR=/path/to/your/logdir/performance


Run your script

If the program stuck at 'train: Creating environment...'

that means we fail to launch the Minecraft server.
Cancel the command, then relaunch the experiments every 60s.


## wrappers.py
class Minecraft:

  ACTION_KEYS_CHOP = ('attack', 'back', 'camera', 'forward', 'jump', 'left', 'right')
  ACTION_SET_CHOP = (
      (1, 0, (0, 0), 0, 0, 0, 0),    # Attack
      (0, 1, (0, 0), 0, 0, 0, 0),    # Back
      (0, 0, (-10, 0), 0, 0, 0, 0),  # Look Left
      (0, 0, (10, 0), 0, 0, 0, 0),   # Look Right
      (0, 0, (0, -10), 0, 0, 0, 0),  # Look Down
      (0, 0, (0, 10), 0, 0, 0, 0),   # Look Up
      (0, 0, (0, 0), 1, 0, 0, 0),    # Forward
      (0, 0, (0, 0), 1, 1, 0, 0),    # Jump + Forward
      (0, 0, (0, 0), 0, 0, 1, 0),    # Left
      (0, 0, (0, 0), 0, 0, 0, 1),    # Right
  )

  ACTION_KEYS_NAVIGATE = ('attack', 'back', 'camera', 'forward', 'jump', 'left', 'place', 'right')
  ACTION_SET_NAVIGATE = (
      (1, 0, (0, 0), 0, 0, 0, 0, 0),    # Attack
      (0, 1, (0, 0), 0, 0, 0, 0, 0),    # Back
      (0, 0, (-10, 0), 0, 0, 0, 0, 0),  # Look Left
      (0, 0, (10, 0), 0, 0, 0, 0, 0),   # Look Right
      (0, 0, (0, -10), 0, 0, 0, 0, 0),  # Look Down
      (0, 0, (0, 5), 0, 0, 0, 0, 0),   # Look Up
      (0, 0, (0, 0), 1, 0, 0, 0, 0),    # Forward
      (0, 0, (0, 0), 1, 1, 0, 0, 0),    # Jump + Forward
      (0, 0, (0, 0), 0, 0, 1, 0, 0),    # Left
      (0, 0, (0, 0), 0, 0, 0, 1, 0),    # Place
      (0, 0, (0, 0), 0, 0, 0, 0, 1),    # Right
  )

  ACTION_SET_KEYS = {'MineRLTreechop-v0': (ACTION_KEYS_CHOP, ACTION_SET_CHOP),
                     'MineRLNavigateDense-v0': (ACTION_KEYS_NAVIGATE, ACTION_SET_NAVIGATE),
                     'MineRLNavigate-v0': (ACTION_KEYS_NAVIGATE, ACTION_SET_NAVIGATE),
                     'MineRLNavigateExtremeDense-v0': (ACTION_KEYS_NAVIGATE, ACTION_SET_NAVIGATE),
                     'MineRLNavigateExtreme-v0': (ACTION_KEYS_NAVIGATE, ACTION_SET_NAVIGATE),
                     'MineRLObtainIronPickaxe-v0': None,
                     'MineRLObtainIronPickaxeDense-v0': None,
                     'MineRLObtainDiamond-v0': None,
                     'MineRLObtainDiamondDense-v0': None}

  def __init__(
      self, task, mode, size=(84, 84), action_repeat=1,
      buffer_size=1024, seed=0, attack_repeat=15, action_set_keys=ACTION_SET_KEYS):
    import minerl
    import gym
    import time
    print(f"{mode}: Creating Environment... ")
    start = time.time()
    self._env = gym.make(task)
    self._env.seed(seed)
    print(f"{mode}: {task} Created: {time.time()-start:.2f}s")

    self._task = task
    self._mode = mode
    self._size = size
    self._seed = seed

    self._action_repeat = action_repeat
    self._attack_repeat = attack_repeat
    self._action_keys, self._action_set = action_set_keys[task][0], action_set_keys[task][1]

    self._camera_angle = np.array([0.0, 0.0])
    self._camera_clip = 60.0

  def _mc_action(self, raw_action):
    # Mapping raw action to Minecraft action
    mc_actions = self._action_set[raw_action]
    act_dict = collections.OrderedDict()
    for k, v in zip(self._action_keys, mc_actions):
      if k == 'camera': # clip the camera angle
        v = np.array(v, dtype=np.float32)
        new_camera_angle = np.clip(self._camera_angle + v, -self._camera_clip, self._camera_clip)
        cond = (np.abs(self._camera_angle + v) >= self._camera_clip)
        if cond.any():
          v[cond] = (np.sign(v) * (self._camera_clip - np.abs(self._camera_angle)))[cond]
        self._camera_angle = new_camera_angle
      act_dict[k] = v
    return act_dict

  def _vec_obs(self, mc_obs):
    # Vectorize Minecraft observation dict
    obs = {}
    for k, v in mc_obs.items():
      if k == 'pov':
        obs['image'] = cv2.resize(mc_obs['pov'], self._size, interpolation=cv2.INTER_AREA)
      elif k == 'compassAngle':
        obs['compassAngle'] = np.array([mc_obs['compassAngle']], dtype=np.float32)
      elif k == 'inventory':
        obs['inventory'] = np.array(list(mc_obs['inventory'].values()), dtype=np.float32)
    return obs

  @property
  def observation_space(self):
    observation_space = self._env.observation_space
    spaces = {}
    for key, value in observation_space.items():
      if key == 'pov':
        spaces['image'] = gym.spaces.Box(
        0, 255, self._size + (3,), dtype=np.uint8)
      elif key == 'inventory':
        spaces['inventory'] = gym.spaces.Discrete(len(observation_space['inventory']))
      elif key == 'compassAngle':
        spaces['compassAngle'] = gym.spaces.Box(
        -180.0, 180.0, value.shape, dtype=np.float32)
    return spaces

  @property
  def action_space(self):
    return gym.spaces.Discrete(len(self._action_set))

  def close(self):
    self._env.close()

  def reset(self):
    obs = self._vec_obs(self._env.reset())
    self._camera_angle = np.array([0.0, 0.0])
    return obs

  def step(self, action):
    if not isinstance(action, collections.OrderedDict):
      action = self._mc_action(action)
    repeats = self._attack_repeat if action['attack'] == 1 else self._action_repeat
    repeats = 1 if 'place' in action and action['place'] > 0 else repeats
    rewards = 0
    for _ in range(repeats):
      obs, reward, done, info = self._env.step(action)
      rewards += reward
      if done:
        break
    obs = self._vec_obs(obs)
    return obs, rewards, done, info

  def render(self):
    raise NotImplementedError("Minecraft's Rendering Not Implemented")
	class Minecraft:

	ACTION_KEYS_CHOP = ('attack', 'back', 'camera', 'forward', 'jump', 'left', 'right')
	ACTION_SET_CHOP = (
	(1, 0, (0, 0), 0, 0, 0, 0), # Attack
	(0, 1, (0, 0), 0, 0, 0, 0), # Back
	(0, 0, (-10, 0), 0, 0, 0, 0), # Look Left
	(0, 0, (10, 0), 0, 0, 0, 0), # Look Right
	(0, 0, (0, -10), 0, 0, 0, 0), # Look Down
	(0, 0, (0, 10), 0, 0, 0, 0), # Look Up
	(0, 0, (0, 0), 1, 0, 0, 0), # Forward
	(0, 0, (0, 0), 1, 1, 0, 0), # Jump + Forward
	(0, 0, (0, 0), 0, 0, 1, 0), # Left
	(0, 0, (0, 0), 0, 0, 0, 1), # Right
	)

	ACTION_KEYS_NAVIGATE = ('attack', 'back', 'camera', 'forward', 'jump', 'left', 'place', 'right')
	ACTION_SET_NAVIGATE = (
	(1, 0, (0, 0), 0, 0, 0, 0, 0), # Attack
	(0, 1, (0, 0), 0, 0, 0, 0, 0), # Back
	(0, 0, (-10, 0), 0, 0, 0, 0, 0), # Look Left
	(0, 0, (10, 0), 0, 0, 0, 0, 0), # Look Right
	(0, 0, (0, -10), 0, 0, 0, 0, 0), # Look Down
	(0, 0, (0, 5), 0, 0, 0, 0, 0), # Look Up
	(0, 0, (0, 0), 1, 0, 0, 0, 0), # Forward
	(0, 0, (0, 0), 1, 1, 0, 0, 0), # Jump + Forward
	(0, 0, (0, 0), 0, 0, 1, 0, 0), # Left
	(0, 0, (0, 0), 0, 0, 0, 1, 0), # Place
	(0, 0, (0, 0), 0, 0, 0, 0, 1), # Right
	)

	ACTION_SET_KEYS = {'MineRLTreechop-v0': (ACTION_KEYS_CHOP, ACTION_SET_CHOP),
	'MineRLNavigateDense-v0': (ACTION_KEYS_NAVIGATE, ACTION_SET_NAVIGATE),
	'MineRLNavigate-v0': (ACTION_KEYS_NAVIGATE, ACTION_SET_NAVIGATE),
	'MineRLNavigateExtremeDense-v0': (ACTION_KEYS_NAVIGATE, ACTION_SET_NAVIGATE),
	'MineRLNavigateExtreme-v0': (ACTION_KEYS_NAVIGATE, ACTION_SET_NAVIGATE),
	'MineRLObtainIronPickaxe-v0': None,
	'MineRLObtainIronPickaxeDense-v0': None,
	'MineRLObtainDiamond-v0': None,
	'MineRLObtainDiamondDense-v0': None}

	def __init__(
	self, task, mode, size=(84, 84), action_repeat=1,
	buffer_size=1024, seed=0, attack_repeat=15, action_set_keys=ACTION_SET_KEYS):
	import minerl
	import gym
	import time
	print(f"{mode}: Creating Environment... ")
	start = time.time()
	self._env = gym.make(task)
	self._env.seed(seed)
	print(f"{mode}: {task} Created: {time.time()-start:.2f}s")

	self._task = task
	self._mode = mode
	self._size = size
	self._seed = seed

	self._action_repeat = action_repeat
	self._attack_repeat = attack_repeat
	self._action_keys, self._action_set = action_set_keys[task][0], action_set_keys[task][1]

	self._camera_angle = np.array([0.0, 0.0])
	self._camera_clip = 60.0

	def _mc_action(self, raw_action):
	# Mapping raw action to Minecraft action
	mc_actions = self._action_set[raw_action]
	act_dict = collections.OrderedDict()
	for k, v in zip(self._action_keys, mc_actions):
	if k == 'camera': # clip the camera angle
	v = np.array(v, dtype=np.float32)
	new_camera_angle = np.clip(self._camera_angle + v, -self._camera_clip, self._camera_clip)
	cond = (np.abs(self._camera_angle + v) >= self._camera_clip)
	if cond.any():
	v[cond] = (np.sign(v) * (self._camera_clip - np.abs(self._camera_angle)))[cond]
	self._camera_angle = new_camera_angle
	act_dict[k] = v
	return act_dict

	def _vec_obs(self, mc_obs):
	# Vectorize Minecraft observation dict
	obs = {}
	for k, v in mc_obs.items():
	if k == 'pov':
	obs['image'] = cv2.resize(mc_obs['pov'], self._size, interpolation=cv2.INTER_AREA)
	elif k == 'compassAngle':
	obs['compassAngle'] = np.array([mc_obs['compassAngle']], dtype=np.float32)
	elif k == 'inventory':
	obs['inventory'] = np.array(list(mc_obs['inventory'].values()), dtype=np.float32)
	return obs

	@property
	def observation_space(self):
	observation_space = self._env.observation_space
	spaces = {}
	for key, value in observation_space.items():
	if key == 'pov':
	spaces['image'] = gym.spaces.Box(
	0, 255, self._size + (3,), dtype=np.uint8)
	elif key == 'inventory':
	spaces['inventory'] = gym.spaces.Discrete(len(observation_space['inventory']))
	elif key == 'compassAngle':
	spaces['compassAngle'] = gym.spaces.Box(
	-180.0, 180.0, value.shape, dtype=np.float32)
	return spaces

	@property
	def action_space(self):
	return gym.spaces.Discrete(len(self._action_set))

	def close(self):
	self._env.close()

	def reset(self):
	obs = self._vec_obs(self._env.reset())
	self._camera_angle = np.array([0.0, 0.0])
	return obs

	def step(self, action):
	if not isinstance(action, collections.OrderedDict):
	action = self._mc_action(action)
	repeats = self._attack_repeat if action['attack'] == 1 else self._action_repeat
	repeats = 1 if 'place' in action and action['place'] > 0 else repeats
	rewards = 0
	for _ in range(repeats):
	obs, reward, done, info = self._env.step(action)
	rewards += reward
	if done:
	break
	obs = self._vec_obs(obs)
	return obs, rewards, done, info

	def render(self):
	raise NotImplementedError("Minecraft's Rendering Not Implemented")