Skip to content

Instantly share code, notes, and snippets.

@liketheflower
Created August 2, 2022 03:55
Show Gist options
  • Save liketheflower/ada5490ba482c9a6a764b7028e182c2a to your computer and use it in GitHub Desktop.
Save liketheflower/ada5490ba482c9a6a764b7028e182c2a to your computer and use it in GitHub Desktop.
import gym
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_util import make_vec_env
import os
import time
# Saving logs to visulise in Tensorboard, saving models
models_dir = f"models/Mountain-{time.time()}"
logdir = f"logs/Mountain-{time.time()}"
if not os.path.exists(models_dir):
os.makedirs(models_dir)
if not os.path.exists(logdir):
os.makedirs(logdir)
# Parallel environments
env = gym.make("MountainCarContinuous-v0")
#env = make_vec_env("MountainCarContinuous-v0")
# The learning agent and hyperparameters
model = PPO(
policy=MlpPolicy,
env=env,
seed=0,
batch_size=256,
ent_coef=0.00429,
learning_rate=7.77e-05,
n_epochs=10,
n_steps=8,
gae_lambda=0.9,
gamma=0.9999,
clip_range=0.1,
max_grad_norm=5,
vf_coef=0.19,
use_sde=True,
policy_kwargs=dict(log_std_init=-3.29, ortho_init=False),
verbose=1,
tensorboard_log=logdir,
)
# Training and saving models along the way
TIMESTEPS = 20000
for i in range(10):
print(i)
model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="PPO")
model.save(f"{models_dir}/{TIMESTEPS*i}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment