Skip to content

Instantly share code, notes, and snippets.

@simoninithomas
Last active May 6, 2019 14:01
Show Gist options
  • Save simoninithomas/b966304a90e83409da753a3be96945b7 to your computer and use it in GitHub Desktop.
Save simoninithomas/b966304a90e83409da753a3be96945b7 to your computer and use it in GitHub Desktop.
import gym
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
# Create the environment
env = gym.make('BipedalWalker-v2')
env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized environment to run
# Define the model
model = PPO2(MlpPolicy, env, verbose=1, tensorboard_log="./ppo_bipedal_tensorboard/")
# Train the agent
model.learn(total_timesteps=25000)
# After training, watch our agent walk
obs = env.reset()
for i in range(1000):
action, _states = model.predict(obs)
obs, rewards, dones, info = env.step(action)
env.render()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment