Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Configuration for TensorFlow Agents PPO on MinitaurBulletEnv-v0
def minitaur_config():
# General
algorithm = ppo.PPOAlgorithm
num_agents = 10
eval_episodes = 30
use_gpu = False
# Environment
env = 'MinitaurBulletEnv-v0'
max_length = 1000
steps = 1e7 # 10M
# Network
network = networks.feed_forward_gaussian
weight_summaries = dict(
all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
policy_layers = 200, 100
value_layers = 200, 100
init_mean_factor = 0.3
init_logstd = -1
# Optimization
update_every = 30
policy_optimizer = 'AdamOptimizer'
value_optimizer = 'AdamOptimizer'
update_epochs_policy = 25
update_epochs_value = 25
policy_lr = 1e-4
value_lr = 3e-4
# Losses
discount = 0.995 # Important.
kl_target = 1e-2
kl_cutoff_factor = 2
kl_cutoff_coef = 1000
kl_init_penalty = 1
return locals()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.