Skip to content

Instantly share code, notes, and snippets.

@alexlimh
Created April 27, 2020 19:19
Show Gist options
  • Save alexlimh/db9274827ab1c699f50c1d0d937a356b to your computer and use it in GitHub Desktop.
Save alexlimh/db9274827ab1c699f50c1d0d937a356b to your computer and use it in GitHub Desktop.
ATARI_DEFAULTS = dict(
# General
steps=2e8,
eval_every=1e5,
log_every=1e4,
prefill=5e4,
dataset_size=4e6,
pretrain=0,
# Environment
time_limit=108000, # 30 minutes of game play.
action_repeat=4,
eval_noise=0.001,
train_every=200,
train_steps=10,
clip_rewards='tanh',
# Model
pred_discount=True,
cnn_depth=48, # 32
stoch_size=100,
deter_size=600,
# Behavior
actor_dist='onehot',
actor_entropy=5e-3,
expl='epsilon_greedy',
expl_amount=0.0,
expl_until=5e7,
discount=0.995,
# Training
model_lr=1e-4, # 2e-4
actor_lr=4e-5, # 5e-5
value_lr=8e-5, # 1e-4
weight_decay=0.0, # 2e-5
kl_scale=0.3,
free_nats=0.0,
actor_grad_clip=5.0,
value_grad_clip=5.0,
oversample_ends=True,
slow_value_target=True,
slow_actor_target=True,
slow_target_update=10,
# Empowerment
)
MINERL_DEFAULTS = dict(
# General
steps=6e8,
eval_every=1e5,
log_every=1e4,
prefill=1e4,
dataset_size=4e6,
pretrain=0,
# Environment
time_limit=8000,
train_every=200,
train_steps=10,
# Model
pred_discount=True,
cnn_depth=48, # 32
stoch_size=100,
deter_size=600,
proprio=True,
# Behavior
actor_dist='onehot',
expl='epsilon_greedy',
expl_amount=0.0,
expl_until=5e7,
discount=0.99,
# Training
model_lr=1e-4, # 2e-4
actor_lr=5e-5, # 5e-5
value_lr=5e-5, # 1e-4
weight_decay=0.0, # 2e-5
free_nats=3.0,
actor_grad_clip=100.0,
value_grad_clip=100.0,
oversample_ends=True,
slow_value_target=True,
slow_actor_target=True,
slow_target_update=10,
)
DMC_TOY_DEFAULTS = dict(
# General
steps=5e6,
eval_every=1e5,
log_every=1e4,
prefill=5e3,
dataset_size=4e6,
pretrain=0,
# Environment
time_limit=1000,
eval_noise=0.001,
train_every=200,
train_steps=10,
# Training
oversample_ends=True,
slow_value_target=True,
slow_actor_target=True,
slow_target_update=10,
)
DMC_DEFAULTS = dict(
# General
steps=5e6,
eval_every=1e5,
log_every=1e4,
prefill=5e3,
dataset_size=4e6,
pretrain=0,
# Training
slow_value_target=True,
slow_actor_target=True,
slow_target_update=100,
)
default_configs=dict(atari=ATARI_DEFAULTS,
mc=MINERL_DEFAULTS,
dmctoy=DMC_TOY_DEFAULTS,
dmc=DMC_DEFAULTS)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment