Skip to content

Instantly share code, notes, and snippets.

@danijar
Last active January 10, 2021 09:01
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save danijar/1dde5eb528b61560734c16d4fd0d93a1 to your computer and use it in GitHub Desktop.
Save danijar/1dde5eb528b61560734c16d4fd0d93a1 to your computer and use it in GitHub Desktop.
Example for using TensorFlow Agents with custom environments

TensorFlow Agents PyBullet Usage Example

This example shows how to install TensorFlow agents and use it on custom environments, such as the environments that come with PyBullet.

It works for both Python 3 and Python 2. Just replace pip3 and python3 with pip2 and python2.

Set up the dependencies:

sudo pip3 install -U tensorflow
sudo pip3 install -U gym
sudo pip3 install -U ruamel.yaml

Install TensorFlow agents from PyPi:

sudo pip3 install agents

Instead, if you want to hack the code, clone the repository:

git clone git@github.com:tensorflow/agents.git
cd agents
sudo python3 setup.py develop

Run the script to train an agent:

python3 agents_example.py --mode train --logdir ~/experiments/agents  --config pybullet_ant

Launch TensorBoard to visualize training:

tensorboard --logdir=~/experiments/agents

During or after training, use this script to render videos:

python3 agents_example.py visualize ~/experiments/agents/ppo_on_pybullet_ant
"""Template for using TensorFlow agents."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import datetime
import logging
import os
import agents
import pybullet_envs # To make AntBulletEnv-v0 available.
import tensorflow as tf
def pybullet_ant():
# General
algorithm = agents.ppo.PPOAlgorithm
num_agents = 10
eval_episodes = 25
use_gpu = False
# Environment
env = 'AntBulletEnv-v0'
max_length = 1000
steps = 1e7 # 10M
# Network
network = agents.scripts.networks.feed_forward_gaussian
weight_summaries = dict(
all=r'.*',
policy=r'.*/policy/.*',
value=r'.*/value/.*')
policy_layers = 200, 100
value_layers = 200, 100
init_mean_factor = 0.1
init_logstd = -1
# Optimization
update_every = 30
update_epochs = 25
optimizer = tf.train.AdamOptimizer
learning_rate = 1e-4
# Losses
discount = 0.995
kl_target = 1e-2
kl_cutoff_factor = 2
kl_cutoff_coef = 1000
kl_init_penalty = 1
return locals()
def main(args):
agents.scripts.utility.set_up_logging()
logdir = args.logdir and os.path.expanduser(args.logdir)
if logdir:
logdir = os.path.join(logdir, '{}-{}'.format(args.timestamp, args.config))
if args.mode == 'train':
try:
# Try to resume training.
config = agents.scripts.utility.load_config(args.logdir)
except IOError:
# Start new training run.
config = agents.tools.AttrDict(globals()[args.config]())
config = agents.scripts.utility.save_config(config, logdir)
for score in agents.scripts.train.train(config, env_processes=True):
logging.info('Score {}.'.format(score))
if args.mode == 'render':
agents.scripts.visualize.visualize(
logdir=args.logdir, outdir=args.logdir, num_agents=1, num_episodes=5,
checkpoint=None, env_processes=True)
if __name__ == '__main__':
timestamp = datetime.datetime.now().strftime('%Y%m%dT%H%M%S')
parser = argparse.ArgumentParser()
parser.add_argument('--mode', choices=['train', 'render'], default='train')
parser.add_argument('--logdir', default='~/logdir/varagent')
parser.add_argument('--config')
parser.add_argument('--timestamp', default=timestamp)
args = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment