Skip to content

Instantly share code, notes, and snippets.

# Install Ray
$ pip install -U ray[rllib]
# Go to the rllib scripts directory
$ git clone git@github.com:ray-project/ray.git && cd ray/python/ray/rllib
# Run in local mode with reduced num workers (to use a GPU, add --resources='{"gpu": 1}')
$ ./train.py --env=PongNoFrameskip-v4 --run=APEX --config='{"num_workers": 4, "timesteps_per_iteration": 5000}'
# Run on a cluster with 1 GPU + 32 workers
First install Ray: http://ray.readthedocs.io/en/latest/installation.html
For Ape-X support, you'll want to install the latest version.
Run APEX in local mode with 4 workers (to use a GPU, add --resources='{"gpu": 1}')
$ cd ray/python/ray/rllib
$ ./train.py --env=PongNoFrameskip-v4 --run=APEX --config='{"num_workers": 4, "timesteps_per_iteration": 5000}'
Run on a cluster with 1 GPU + 32 workers
For cluster setup instructions see http://ray.readthedocs.io/en/latest/autoscaling.html
"""Example of a custom gym environment. Run this for a demo."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import gym
from gym.spaces import Discrete, Box, Tuple
from gym.envs.registration import EnvSpec
@ray.remote
class MyEnvActor:
def reset(self):
return obs, 0, False, {} # dummy vals for all but obs
def step(self, action):
...
return obs, rew, done, info
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
from ray.rllib.agents import with_common_config
from ray.rllib.agents.trainer_template import build_trainer
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
from ray.rllib.optimizers import SyncSamplesOptimizer, LocalMultiGPUOptimizer
class MaskingLayerRNNmodel(TFModelV2):
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw):
super(MaskingLayerRNNmodel, self).__init__(obs_space, action_space, num_outputs, model_config, name, **kw)
self.initialize_lstm_with_prev_state = model_config['custom_options']['initialize_lstm_with_prev_state']
self.input_layer = tf.keras.layers.Input(
shape=(None, obs_space.shape[0]),
name='inputLayer')
self.state_in_c = tf.keras.layers.Input(
shape=(model_config['lstm_cell_size']),
name='c')
class MaskingLayerRNNmodel(TFModelV2):
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw):
super(MaskingLayerRNNmodel, self).__init__(obs_space, action_space, num_outputs, model_config, name, **kw)
self.initialize_lstm_with_prev_state = model_config['custom_options']['initialize_lstm_with_prev_state']
self.input_layer = tf.keras.layers.Input(
shape=(None, obs_space.shape[0]),
name='inputLayer')
self.state_in_c = tf.keras.layers.Input(
shape=(model_config['lstm_cell_size']),
name='c')
Idempotent actor API example.
1. Actors go from PENDING -> CREATED -> [RECONSTRUCTING -> CREATED] -> DEAD
2. A single client may issue multiple updates, it is important these updates aren't re-ordered.
This can be handled by making the actor update calls idempotent.
Non-idempotent:
def AsyncUpdate(actor_id, actor_state)
import time
import os
import random
import numpy as np
import ray
FAST = "DRY_RUN" in os.environ
if FAST:
import argparse
import gym
from gym.spaces import Discrete, Box
import numpy as np
import ray
from ray import tune
from ray.tune import grid_search
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.test_utils import check_learning_achieved