Skip to content

Instantly share code, notes, and snippets.

import argparse
import gym
from gym.spaces import Discrete, Box
import numpy as np
import ray
from ray import tune
from ray.tune import grid_search
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.test_utils import check_learning_achieved
import time
import os
import random
import numpy as np
import ray
FAST = "DRY_RUN" in os.environ
if FAST:
Idempotent actor API example.
1. Actors go from PENDING -> CREATED -> [RECONSTRUCTING -> CREATED] -> DEAD
2. A single client may issue multiple updates, it is important these updates aren't re-ordered.
This can be handled by making the actor update calls idempotent.
Non-idempotent:
def AsyncUpdate(actor_id, actor_state)
class MaskingLayerRNNmodel(TFModelV2):
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw):
super(MaskingLayerRNNmodel, self).__init__(obs_space, action_space, num_outputs, model_config, name, **kw)
self.initialize_lstm_with_prev_state = model_config['custom_options']['initialize_lstm_with_prev_state']
self.input_layer = tf.keras.layers.Input(
shape=(None, obs_space.shape[0]),
name='inputLayer')
self.state_in_c = tf.keras.layers.Input(
shape=(model_config['lstm_cell_size']),
name='c')
class MaskingLayerRNNmodel(TFModelV2):
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw):
super(MaskingLayerRNNmodel, self).__init__(obs_space, action_space, num_outputs, model_config, name, **kw)
self.initialize_lstm_with_prev_state = model_config['custom_options']['initialize_lstm_with_prev_state']
self.input_layer = tf.keras.layers.Input(
shape=(None, obs_space.shape[0]),
name='inputLayer')
self.state_in_c = tf.keras.layers.Input(
shape=(model_config['lstm_cell_size']),
name='c')
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
from ray.rllib.agents import with_common_config
from ray.rllib.agents.trainer_template import build_trainer
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
from ray.rllib.optimizers import SyncSamplesOptimizer, LocalMultiGPUOptimizer
@ray.remote
class MyEnvActor:
def reset(self):
return obs, 0, False, {} # dummy vals for all but obs
def step(self, action):
...
return obs, rew, done, info
"""Example of a custom gym environment. Run this for a demo."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import gym
from gym.spaces import Discrete, Box, Tuple
from gym.envs.registration import EnvSpec
First install Ray: http://ray.readthedocs.io/en/latest/installation.html
For Ape-X support, you'll want to install the latest version.
Run APEX in local mode with 4 workers (to use a GPU, add --resources='{"gpu": 1}')
$ cd ray/python/ray/rllib
$ ./train.py --env=PongNoFrameskip-v4 --run=APEX --config='{"num_workers": 4, "timesteps_per_iteration": 5000}'
Run on a cluster with 1 GPU + 32 workers
For cluster setup instructions see http://ray.readthedocs.io/en/latest/autoscaling.html
# Install Ray
$ pip install -U ray[rllib]
# Go to the rllib scripts directory
$ git clone git@github.com:ray-project/ray.git && cd ray/python/ray/rllib
# Run in local mode with reduced num workers (to use a GPU, add --resources='{"gpu": 1}')
$ ./train.py --env=PongNoFrameskip-v4 --run=APEX --config='{"num_workers": 4, "timesteps_per_iteration": 5000}'
# Run on a cluster with 1 GPU + 32 workers