Eric Liang ericl

## run-apex.txt
# Install Ray
$ pip install -U ray[rllib]

# Go to the rllib scripts directory
$ git clone git@github.com:ray-project/ray.git && cd ray/python/ray/rllib

# Run in local mode with reduced num workers (to use a GPU, add --resources='{"gpu": 1}')
$ ./train.py --env=PongNoFrameskip-v4 --run=APEX --config='{"num_workers": 4, "timesteps_per_iteration": 5000}'

# Run on a cluster with 1 GPU + 32 workers

## run-apex.txt
First install Ray: http://ray.readthedocs.io/en/latest/installation.html
For Ape-X support, you'll want to install the latest version.

Run APEX in local mode with 4 workers (to use a GPU, add --resources='{"gpu": 1}')

  $ cd ray/python/ray/rllib
  $ ./train.py --env=PongNoFrameskip-v4 --run=APEX --config='{"num_workers": 4, "timesteps_per_iteration": 5000}'

Run on a cluster with 1 GPU + 32 workers
For cluster setup instructions see http://ray.readthedocs.io/en/latest/autoscaling.html

## gist:1ea2e5f31bdd0087b6183f737a189163
"""Example of a custom gym environment. Run this for a demo."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import gym
from gym.spaces import Discrete, Box, Tuple
from gym.envs.registration import EnvSpec

## async_base_env.py
@ray.remote
class MyEnvActor:
    def reset(self):
        return obs, 0, False, {}  # dummy vals for all but obs

    def step(self, action):
        ...
        return obs, rew, done, info


## ppo_new.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging

from ray.rllib.agents import with_common_config
from ray.rllib.agents.trainer_template import build_trainer
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
from ray.rllib.optimizers import SyncSamplesOptimizer, LocalMultiGPUOptimizer

## gist:6501eb32054c1e000dbd7ba2492ff9b1
class MaskingLayerRNNmodel(TFModelV2):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw):
        super(MaskingLayerRNNmodel, self).__init__(obs_space, action_space, num_outputs, model_config, name, **kw)
        self.initialize_lstm_with_prev_state = model_config['custom_options']['initialize_lstm_with_prev_state']
        self.input_layer = tf.keras.layers.Input(
            shape=(None, obs_space.shape[0]),
            name='inputLayer')
        self.state_in_c = tf.keras.layers.Input(
            shape=(model_config['lstm_cell_size']),
            name='c')

## model.py
class MaskingLayerRNNmodel(TFModelV2):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw):
        super(MaskingLayerRNNmodel, self).__init__(obs_space, action_space, num_outputs, model_config, name, **kw)
        self.initialize_lstm_with_prev_state = model_config['custom_options']['initialize_lstm_with_prev_state']
        self.input_layer = tf.keras.layers.Input(
            shape=(None, obs_space.shape[0]),
            name='inputLayer')
        self.state_in_c = tf.keras.layers.Input(
            shape=(model_config['lstm_cell_size']),
            name='c')

## gist:ed1dce20d310b86c0dc5540ae991494b
Idempotent actor API example.

1. Actors go from PENDING -> CREATED -> [RECONSTRUCTING -> CREATED] -> DEAD
2. A single client may issue multiple updates, it is important these updates aren't re-ordered.

This can be handled by making the actor update calls idempotent.

Non-idempotent:
  def AsyncUpdate(actor_id, actor_state)


## serving.py
import time
import os
import random
import numpy as np

import ray

FAST = "DRY_RUN" in os.environ

if FAST:

## test.py
import argparse
import gym
from gym.spaces import Discrete, Box
import numpy as np

import ray
from ray import tune
from ray.tune import grid_search
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.test_utils import check_learning_achieved
	# Install Ray
	$ pip install -U ray[rllib]

	# Go to the rllib scripts directory
	$ git clone git@github.com:ray-project/ray.git && cd ray/python/ray/rllib

	# Run in local mode with reduced num workers (to use a GPU, add --resources='{"gpu": 1}')
	$ ./train.py --env=PongNoFrameskip-v4 --run=APEX --config='{"num_workers": 4, "timesteps_per_iteration": 5000}'

	# Run on a cluster with 1 GPU + 32 workers
	First install Ray: http://ray.readthedocs.io/en/latest/installation.html
	For Ape-X support, you'll want to install the latest version.

	Run APEX in local mode with 4 workers (to use a GPU, add --resources='{"gpu": 1}')

	$ cd ray/python/ray/rllib
	$ ./train.py --env=PongNoFrameskip-v4 --run=APEX --config='{"num_workers": 4, "timesteps_per_iteration": 5000}'

	Run on a cluster with 1 GPU + 32 workers
	For cluster setup instructions see http://ray.readthedocs.io/en/latest/autoscaling.html
	"""Example of a custom gym environment. Run this for a demo."""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import numpy as np
	import gym
	from gym.spaces import Discrete, Box, Tuple
	from gym.envs.registration import EnvSpec
	@ray.remote
	class MyEnvActor:
	def reset(self):
	return obs, 0, False, {} # dummy vals for all but obs

	def step(self, action):
	...
	return obs, rew, done, info
	class MaskingLayerRNNmodel(TFModelV2):
	def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw):
	super(MaskingLayerRNNmodel, self).__init__(obs_space, action_space, num_outputs, model_config, name, **kw)
	self.initialize_lstm_with_prev_state = model_config['custom_options']['initialize_lstm_with_prev_state']
	self.input_layer = tf.keras.layers.Input(
	shape=(None, obs_space.shape[0]),
	name='inputLayer')
	self.state_in_c = tf.keras.layers.Input(
	shape=(model_config['lstm_cell_size']),
	name='c')
	Idempotent actor API example.

	1. Actors go from PENDING -> CREATED -> [RECONSTRUCTING -> CREATED] -> DEAD
	2. A single client may issue multiple updates, it is important these updates aren't re-ordered.

	This can be handled by making the actor update calls idempotent.

	Non-idempotent:
	def AsyncUpdate(actor_id, actor_state)
	import time
	import os
	import random
	import numpy as np

	import ray

	FAST = "DRY_RUN" in os.environ

	if FAST:
	import argparse
	import gym
	from gym.spaces import Discrete, Box
	import numpy as np

	import ray
	from ray import tune
	from ray.tune import grid_search
	from ray.rllib.utils.framework import try_import_tf, try_import_torch
	from ray.rllib.utils.test_utils import check_learning_achieved