Skip to content

Instantly share code, notes, and snippets.

View rl13.py
N_ITER = 10
s = "{:3d} reward {:6.2f}/{:6.2f}/{:6.2f} len {:6.2f} saved {}"
for n in range(N_ITER):
result = agent.train()
file_name = agent.save(CHECKPOINT_ROOT)
print(s.format(
n + 1,
result["episode_reward_min"],
View rl14.txt
_____________________________________________________________________________
Layer (type) Output Shape Param # Connected to
=============================================================================
observations (InputLayer) [(None, 16)] 0
_____________________________________________________________________________
fc_1 (Dense) (None, 256) 4352 observations[0][0] 
_____________________________________________________________________________
fc_value_1 (Dense) (None, 256) 4352 observations[0][0] 
_____________________________________________________________________________
fc_2 (Dense) (None, 256) 65792 fc_1[0][0] 
View rl15.sh
rllib rollout \
 tmp/ppo/froz/checkpoint_10/checkpoint-10 \
 - config "{\"env\": \"FrozenLake-v0\"}" \
 - run PPO \
 - steps 2000
View rl16.py
CHECKPOINT_ROOT = "tmp/ppo/cart"
shutil.rmtree(CHECKPOINT_ROOT, ignore_errors=True, onerror=None)
ray_results = os.getenv("HOME") + "/ray_results/"
shutil.rmtree(ray_results, ignore_errors=True, onerror=None)
View rl17.py
SELECT_ENV = "CartPole-v1"
config = ppo.DEFAULT_CONFIG.copy()
config["log_level"] = "WARN"
agent = ppo.PPOTrainer(config, env=SELECT_ENV)
View rl18.py
N_ITER = 40
s = "{:3d} reward {:6.2f}/{:6.2f}/{:6.2f} len {:6.2f} saved {}"
for n in range(N_ITER):
result = agent.train()
file_name = agent.save(CHECKPOINT_ROOT)
print(s.format(
n + 1,
result["episode_reward_min"],
View rl19.py
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
observations (InputLayer) [(None, 4)] 0
__________________________________________________________________________________________________
fc_1 (Dense) (None, 256) 1280 observations[0][0]
__________________________________________________________________________________________________
fc_value_1 (Dense) (None, 256) 1280 observations[0][0]
__________________________________________________________________________________________________
fc_2 (Dense) (None, 256) 65792 fc_1[0][0]
View rl20.sh
rllib rollout \
 tmp/ppo/cart/checkpoint_40/checkpoint-40 \
 - config "{\"env\": \"CartPole-v1\"}" \
 - run PPO \
 - steps 2000
View rl21.py
CHECKPOINT_ROOT = "tmp/ppo/moun"
shutil.rmtree(CHECKPOINT_ROOT, ignore_errors=True, onerror=None)
ray_results = os.getenv("HOME") + "/ray_results/"
shutil.rmtree(ray_results, ignore_errors=True, onerror=None)
View rl22.py
SELECT_ENV = "MountainCar-v0"
config = ppo.DEFAULT_CONFIG.copy()
config["log_level"] = "WARN"
config["num_workers"] = 4 # default = 2
config["train_batch_size"] = 10000 # default = 4000
config["sgd_minibatch_size"] = 256 # default = 128
config["evaluation_num_episodes"] = 50 # default = 10