Skip to content

Instantly share code, notes, and snippets.

@nagataka
Created April 21, 2020 22:15
Show Gist options
  • Save nagataka/35ad2ce46c3d9e5365b47b4e3dc6acb6 to your computer and use it in GitHub Desktop.
Save nagataka/35ad2ce46c3d9e5365b47b4e3dc6acb6 to your computer and use it in GitHub Desktop.
Initial example of using RLlib
import gym
import ray
from ray.rllib.agents.ppo import PPOTrainer, DEFAULT_CONFIG
import pprint as pp
#tune.run(PPOTrainer, config={"env": "Breakout-v0", "use_pytorch": True})
ray.init(num_gpus=1, ignore_reinit_error=True, log_to_driver=False)
# https://github.com/ray-project/ray/blob/master/rllib/agents/ppo/ppo.py#L15
config = DEFAULT_CONFIG.copy()
config['use_pytorch'] = True
config['num_gpus'] = 1
agent = PPOTrainer(config, "Breakout-v0")
for i in range(10000):
result = agent.train()
#pp.pprint(result)
print(result['episode_reward_mean'])
if i % 100 == 0:
checkpoint = agent.save()
print("checkpoint saved at", checkpoint)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment