Last active
July 27, 2017 16:22
-
-
Save befelix/517887234139a5d9ebb5654161fa0b54 to your computer and use it in GitHub Desktop.
Comparison
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# noinspection PyUnresolvedReferences | |
import mujoco_py # Mujoco must come before other imports. https://openai.slack.com/archives/C1H6P3R7B/p1492828680631850 | |
from mpi4py import MPI | |
from baselines.common import set_global_seeds | |
import os | |
import gym | |
import logging | |
from baselines import logger | |
from baselines.pposgd.mlp_policy import MlpPolicy | |
from baselines import bench | |
from baselines.trpo_mpi import trpo_mpi | |
num_cpu=1 | |
def train(env_id, num_timesteps, seed): | |
import baselines.common.tf_util as U | |
logger.session().__enter__() | |
sess = U.single_threaded_session() | |
sess.__enter__() | |
rank = MPI.COMM_WORLD.Get_rank() | |
if rank != 0: | |
logger.set_level(logger.DISABLED) | |
workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() | |
set_global_seeds(workerseed) | |
env = gym.make(env_id) | |
def policy_fn(name, ob_space, ac_space): | |
return MlpPolicy(name=name, ob_space=env.observation_space, ac_space=env.action_space, | |
hid_size=64, num_hid_layers=2) | |
env = bench.Monitor(env, os.path.join(logger.get_dir(), "%i.monitor.json" % rank)) | |
env.seed(workerseed) | |
gym.logger.setLevel(logging.WARN) | |
trpo_mpi.learn(env, | |
policy_fn, | |
timesteps_per_batch=25000, | |
max_kl=0.01, | |
cg_iters=10, | |
cg_damping=0.1, | |
max_timesteps=1e3 * num_timesteps, | |
gamma=0.99, | |
lam=0.97, | |
vf_iters=5, | |
vf_stepsize=1e-3) | |
env.close() | |
def main(): | |
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' | |
os.environ['OMP_NUM_THREADS'] = '2' | |
os.environ['MKL_NUM_THREADS'] = '2' | |
train('Hopper-v1', num_timesteps=1e6, seed=0) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import division, print_function | |
import os | |
import logging | |
from tensorforce.agents import TRPOAgent | |
from tensorforce.environments.openai_gym import OpenAIGym | |
from tensorforce.execution import Runner | |
from tensorforce.core.networks import layered_network_builder | |
from tensorforce import Configuration | |
def main(): | |
logger = logging.getLogger(__name__) | |
logger.setLevel(logging.DEBUG) | |
environment = OpenAIGym('Hopper-v1') | |
network_config = [{"type": "dense", "size": 64, "activation": "tanh"}, | |
{"type": "dense", "size": 64, "activation": "tanh"}] | |
network = layered_network_builder(network_config) | |
print(environment.actions) | |
agent_config = dict(states=environment.states, | |
actions=environment.actions, | |
network=network, | |
loglevel="info", | |
batch_size=float('inf'), | |
discount=0.97, | |
generalized_advantage_estimation=True, | |
gae_lambda=0.99, | |
baseline={"type": "mlp", | |
"size": 64, | |
"repeat_update": 5}, | |
learning_rate=1e-3, | |
normalize_advantage=True, | |
override_line_search=False, | |
cg_damping=0.1, | |
line_search_steps=20, | |
max_kl_divergence=0.01, | |
cg_iterations=10) | |
agent = TRPOAgent(config=Configuration(allow_defaults=True, **agent_config)) | |
runner = Runner(agent=agent, | |
environment=environment) | |
class Callback(object): | |
def __init__(self): | |
self.update_count = 0 | |
self.last_episode = 0 | |
def __call__(self, r): | |
agent = r.agent | |
if agent.batch_count >= 25000: | |
logger.info('-' * 50) | |
logger.info("Iteration {iteration}".format(iteration=self.update_count)) | |
episode_rewards = r.episode_rewards[self.last_episode:] | |
logger.info("Average batch reward: {}".format(sum(episode_rewards) / | |
len(episode_rewards))) | |
episode_lengths = r.episode_lengths[self.last_episode:] | |
logger.info("Average episode length: {}".format(sum(episode_lengths) / | |
len(episode_lengths))) | |
logger.info("Episode reward: {}".format(r.episode_rewards[-1])) | |
agent.model.update(agent.batch) | |
agent.reset_batch() | |
self.update_count += 1 | |
self.last_episode = r.episode | |
return True | |
logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=environment)) | |
runner.run(episodes=float('inf'), max_timesteps=1e6, episode_finished=Callback()) | |
logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode)) | |
environment.close() | |
if __name__ == '__main__': | |
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' | |
os.environ['OMP_NUM_THREADS'] = '2' | |
os.environ['MKL_NUM_THREADS'] = '2' | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment