|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=5000 --env=Pendulum-v0 --outfile=$outdir/Pendulum-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=5000 --env=CartPole-v0 --outfile=$outdir/CartPole-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=5000 --env=MountainCar-v0 --outfile=$outdir/MountainCar-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=5000 --env=Acrobot-v0 --outfile=$outdir/Acrobot-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=5000 --env=InvertedPendulum-v0 --outfile=$outdir/InvertedPendulum-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=15000 --env=Reacher-v0 --outfile=$outdir/Reacher-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=15000 --env=InvertedDoublePendulum-v0 --outfile=$outdir/InvertedDoublePendulum-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=25000 --env=HalfCheetah-v0 --outfile=$outdir/HalfCheetah-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=25000 --env=Hopper-v0 --outfile=$outdir/Hopper-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=25000 --env=Swimmer-v0 --outfile=$outdir/Swimmer-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=25000 --env=Walker2d-v0 --outfile=$outdir/Walker2d-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=50000 --env=Ant-v0 --outfile=$outdir/Ant-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=50000 --env=Humanoid-v0 --outfile=$outdir/Humanoid-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=AirRaid-ram-v0 --outfile=$outdir/AirRaid-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Alien-ram-v0 --outfile=$outdir/Alien-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Amidar-ram-v0 --outfile=$outdir/Amidar-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Assault-ram-v0 --outfile=$outdir/Assault-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Asterix-ram-v0 --outfile=$outdir/Asterix-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Asteroids-ram-v0 --outfile=$outdir/Asteroids-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Atlantis-ram-v0 --outfile=$outdir/Atlantis-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=BankHeist-ram-v0 --outfile=$outdir/BankHeist-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=BattleZone-ram-v0 --outfile=$outdir/BattleZone-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=BeamRider-ram-v0 --outfile=$outdir/BeamRider-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Berzerk-ram-v0 --outfile=$outdir/Berzerk-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Bowling-ram-v0 --outfile=$outdir/Bowling-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Boxing-ram-v0 --outfile=$outdir/Boxing-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Breakout-ram-v0 --outfile=$outdir/Breakout-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Carnival-ram-v0 --outfile=$outdir/Carnival-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Centipede-ram-v0 --outfile=$outdir/Centipede-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=ChopperCommand-ram-v0 --outfile=$outdir/ChopperCommand-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=CrazyClimber-ram-v0 --outfile=$outdir/CrazyClimber-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=DemonAttack-ram-v0 --outfile=$outdir/DemonAttack-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=DoubleDunk-ram-v0 --outfile=$outdir/DoubleDunk-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=ElevatorAction-ram-v0 --outfile=$outdir/ElevatorAction-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Enduro-ram-v0 --outfile=$outdir/Enduro-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=FishingDerby-ram-v0 --outfile=$outdir/FishingDerby-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Freeway-ram-v0 --outfile=$outdir/Freeway-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Frostbite-ram-v0 --outfile=$outdir/Frostbite-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Gopher-ram-v0 --outfile=$outdir/Gopher-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Gravitar-ram-v0 --outfile=$outdir/Gravitar-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=IceHockey-ram-v0 --outfile=$outdir/IceHockey-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Jamesbond-ram-v0 --outfile=$outdir/Jamesbond-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=JourneyEscape-ram-v0 --outfile=$outdir/JourneyEscape-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Kangaroo-ram-v0 --outfile=$outdir/Kangaroo-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Krull-ram-v0 --outfile=$outdir/Krull-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=KungFuMaster-ram-v0 --outfile=$outdir/KungFuMaster-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=MontezumaRevenge-ram-v0 --outfile=$outdir/MontezumaRevenge-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=MsPacman-ram-v0 --outfile=$outdir/MsPacman-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=NameThisGame-ram-v0 --outfile=$outdir/NameThisGame-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Phoenix-ram-v0 --outfile=$outdir/Phoenix-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Pitfall-ram-v0 --outfile=$outdir/Pitfall-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Pong-ram-v0 --outfile=$outdir/Pong-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Pooyan-ram-v0 --outfile=$outdir/Pooyan-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=PrivateEye-ram-v0 --outfile=$outdir/PrivateEye-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Qbert-ram-v0 --outfile=$outdir/Qbert-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Riverraid-ram-v0 --outfile=$outdir/Riverraid-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=RoadRunner-ram-v0 --outfile=$outdir/RoadRunner-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Robotank-ram-v0 --outfile=$outdir/Robotank-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Seaquest-ram-v0 --outfile=$outdir/Seaquest-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Skiing-ram-v0 --outfile=$outdir/Skiing-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Solaris-ram-v0 --outfile=$outdir/Solaris-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=SpaceInvaders-ram-v0 --outfile=$outdir/SpaceInvaders-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=StarGunner-ram-v0 --outfile=$outdir/StarGunner-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Tennis-ram-v0 --outfile=$outdir/Tennis-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=TimePilot-ram-v0 --outfile=$outdir/TimePilot-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Tutankham-ram-v0 --outfile=$outdir/Tutankham-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=UpNDown-ram-v0 --outfile=$outdir/UpNDown-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Venture-ram-v0 --outfile=$outdir/Venture-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=VideoPinball-ram-v0 --outfile=$outdir/VideoPinball-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=WizardOfWor-ram-v0 --outfile=$outdir/WizardOfWor-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=YarsRevenge-ram-v0 --outfile=$outdir/YarsRevenge-ram-v0.h5" |
|
"python run_pg.py --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=40000 --env=Zaxxon-ram-v0 --outfile=$outdir/Zaxxon-ram-v0.h5" |
Every single python line would have to include --video=0 ? Sounds like something that should be controlled by environment variable VIDEO! Any ideas what kind of error this might be? (having my first go with the code)
python ../run_pg.py --video=0 --gamma=0.995 --lam=0.97 --agent=modular_rl.agentzoo.TrpoAgent --max_kl=0.01 --cg_damping=0.1 --activation=tanh --n_iter=250 --seed=0 --timesteps_per_batch=5000 --env=Pendulum-v0 --outfile=$outdir/Pendulum-v0.h5
Using TensorFlow backend.
[2016-11-23 18:59:35,555] Making new env: Pendulum-v0
Traceback (most recent call last):
File "../run_pg.py", line 34, in
agent = agent_ctor(env.observation_space, env.action_space, cfg)
File "/data/modular_rl/modular_rl/agentzoo.py", line 118, in init
policy, self.baseline = make_mlps(ob_space, ac_space, cfg)
File "/data/modular_rl/modular_rl/agentzoo.py", line 37, in make_mlps
Wlast.set_value(Wlast.get_value(borrow=True)*0.1)
AttributeError: 'Variable' object has no attribute 'set_value'
[2016-11-23 18:59:35,765] Finished writing results. You can upload them to the scoreboard via gym.upload('/tmp/Pendulum-v0.h5.dir')