Skip to content

Instantly share code, notes, and snippets.

@tik0
Last active February 28, 2020 09:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tik0/c710013921933de661b2603530a8a7f7 to your computer and use it in GitHub Desktop.
Save tik0/c710013921933de661b2603530a8a7f7 to your computer and use it in GitHub Desktop.
PPO eval
#!/bin/bash
touch ppo_evaluate.txt
echo 'Flat: ' >> ppo_evaluate.txt
cp /root/ros2learn/environments/gym-gazebo2/gym_gazebo2/worlds/empty_bullet.world /root/ros2learn/environments/gym-gazebo2/gym_gazebo2/worlds/parcour.world
python3 run_evaluation_ppo2.py
cat ppoTestRun.txt >> ppo_evaluate.txt
pkill gzserver
echo'' >> ppo_evaluate.txt
echo '0.05: ' >> ppo_evaluate.txt
cp /root/ros2learn/environments/gym-gazebo2/gym_gazebo2/worlds/heightmap1-0_05m.world /root/ros2learn/environments/gym-gazebo2/gym_gazebo2/worlds/parcour.world
python3 run_evaluation_ppo2.py
cat ppoTestRun.txt >> ppo_evaluate.txt
pkill gzserver
echo'' >> ppo_evaluate.txt
echo '0.10: ' >> ppo_evaluate.txt
cp /root/ros2learn/environments/gym-gazebo2/gym_gazebo2/worlds/heightmap1-0_10m.world /root/ros2learn/environments/gym-gazebo2/gym_gazebo2/worlds/parcour.world
python3 run_evaluation_ppo2.py
cat ppoTestRun.txt >> ppo_evaluate.txt
pkill gzserver
echo'' >> ppo_evaluate.txt
echo '0.15: ' >> ppo_evaluate.txt
cp /root/ros2learn/environments/gym-gazebo2/gym_gazebo2/worlds/heightmap1-0_15m.world /root/ros2learn/environments/gym-gazebo2/gym_gazebo2/worlds/parcour.world
python3 run_evaluation_ppo2.py
cat ppoTestRun.txt >> ppo_evaluate.txt
pkill gzserver
import os
import sys
import time
import gym
import gym_gazebo2
import numpy as np
import multiprocessing
import tensorflow as tf
import threading
import statistics
from importlib import import_module
from baselines import bench, logger
from baselines.ppo2 import model as ppo2
from baselines.ppo2 import model as ppo
from baselines.common import set_global_seeds
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from baselines.common.vec_env.vec_normalize import VecNormalize
from baselines.common.policies import build_policy
ncpu = multiprocessing.cpu_count()
filename = sys.argv[1]
sys.argv = [sys.argv[0]]
if sys.platform == 'darwin':
ncpu //= 2
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=ncpu,
inter_op_parallelism_threads=ncpu,
log_device_placement=False)
config.gpu_options.allow_growth = True
tf.Session(config=config).__enter__()
def get_alg_module(alg, submodule=None):
submodule = submodule or alg
try:
# first try to import the alg module from baselines
alg_module = import_module('.'.join(['baselines', alg, submodule]))
except ImportError:
# then from rl_algs
alg_module = import_module('.'.join(['rl_' + 'algs', alg, submodule]))
return alg_module
def get_learn_function_defaults(alg, env_type):
try:
alg_defaults = get_alg_module(alg, 'defaults')
kwargs = getattr(alg_defaults, env_type)()
except (ImportError, AttributeError):
kwargs = {}
return kwargs
def constfn(val):
def f(_):
return val
return f
def make_env():
env = gym.make(defaults['env_name'])
env.set_episode_size(defaults['nsteps'])
env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir()), allow_early_resets=True)
return env
# Get dictionary from baselines/ppo2/defaults
defaults = get_learn_function_defaults('ppo2', 'phantomx_mlp')
env = gym.make('PhantomX-v0')
set_global_seeds(defaults['seed'])
alg_kwargs ={ 'num_layers': defaults['num_layers'], 'num_hidden': defaults['num_hidden'] }
nenvs = 1
nbatch = nenvs * defaults['nsteps']
nbatch_train = nbatch // defaults['nminibatches']
legs = ['lf', 'lm', 'lr', 'rf', 'rm', 'rr']
models = {}
def runner(leg, env):
leg_env = gym.make('PhantomXLeg-v0')
leg_env.set_info(env.info)
leg_env.leg_name = leg
policy = build_policy(leg_env, defaults['network'], **alg_kwargs)
model = ppo2.Model(policy=policy, ob_space=leg_env.observation_space, ac_space=leg_env.action_space, nbatch_act=nenvs,
nbatch_train=nbatch_train,
nsteps=defaults['nsteps'], ent_coef=defaults['ent_coef'], vf_coef=defaults['vf_coef'],
max_grad_norm=defaults['max_grad_norm'])
model.load('' + leg + '/checkpoints/05000')
obs = leg_env.reset()
ep_reward = 0
rewards = []
episode = 0
step = 0
while True:
step += 1
action, value_estimate, next_state, neglogp = model.step(obs)
obs, reward, done, _ = leg_env.step(action[0])
ep_reward += reward
if done:
leg_env.reset()
episode += 1
print(step)
print(ep_reward)
rewards.append(ep_reward)
step = 0
ep_reward = 0
if episode >= 100:
break
f= open(filename,"w+")
f.write("Variance: " + str(np.var(rewards)))
rewards = np.array(rewards, dtype=float)
f.write(",Median: " + str(statistics.median(rewards)))
f.write(",Mean: " + str(np.mean(rewards)))
f.close()
while True:
time.sleep(2)
print("DONE")
for leg in legs:
models[leg] = threading.Thread(target=runner, args=(leg, env))
models[leg].start()
loop = True
while loop:
env.info.execute_action()
env.info.execute_reset()
time.sleep(1/1000)
import os
import sys
import time
import gym
import gym_gazebo2
import numpy as np
import multiprocessing
import tensorflow as tf
import statistics
from importlib import import_module
from baselines import bench, logger
from baselines.ppo2 import model as ppo2
from baselines.common import set_global_seeds
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from baselines.common.vec_env.vec_normalize import VecNormalize
from baselines.common.policies import build_policy
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin':
ncpu //= 2
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=ncpu,
inter_op_parallelism_threads=ncpu,
log_device_placement=False)
config.gpu_options.allow_growth = True
tf.Session(config=config).__enter__()
def get_alg_module(alg, submodule=None):
submodule = submodule or alg
try:
# first try to import the alg module from baselines
alg_module = import_module('.'.join(['baselines', alg, submodule]))
except ImportError:
# then from rl_algs
alg_module = import_module('.'.join(['rl_' + 'algs', alg, submodule]))
return alg_module
def get_learn_function_defaults(alg, env_type):
try:
alg_defaults = get_alg_module(alg, 'defaults')
kwargs = getattr(alg_defaults, env_type)()
except (ImportError, AttributeError):
kwargs = {}
return kwargs
def constfn(val):
def f(_):
return val
return f
def make_env():
env = gym.make(defaults['env_name'])
env.set_episode_size(defaults['nsteps'])
env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir()), allow_early_resets=True)
return env
# Get dictionary from baselines/ppo2/defaults
defaults = get_learn_function_defaults('ppo2', 'phantomx_mlp')
env = DummyVecEnv([make_env])
set_global_seeds(defaults['seed'])
alg_kwargs ={ 'num_layers': defaults['num_layers'], 'num_hidden': defaults['num_hidden'] }
policy = build_policy(env, defaults['network'], **alg_kwargs)
nenvs = env.num_envs
ob_space = env.observation_space
ac_space = env.action_space
nbatch = nenvs * defaults['nsteps']
nbatch_train = nbatch // defaults['nminibatches']
make_model = lambda : ppo2.Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nbatch_act=nenvs,
nbatch_train=nbatch_train,
nsteps=defaults['nsteps'], ent_coef=defaults['ent_coef'], vf_coef=defaults['vf_coef'],
max_grad_norm=defaults['max_grad_norm'])
model = make_model()
model.load('./main/checkpoints/05000')
obs = env.reset()
loop = True
step = 0
episode = 0
ep_reward = 0
rewards = []
f= open("ppoTestRun.txt","w+")
while loop:
step += 1
actions = model.step(obs)[0]
obs, reward, done, _ = env.step(actions)
ep_reward += reward
if done:
print(episode)
obs = env.reset()
step = 0
rewards.append(ep_reward)
ep_reward = 0
episode += 1
if episode >= 100:
break
f.write("Variance: " + str(np.var(rewards)))
rewards = np.array(rewards, dtype=float)
f.write(",Median: " + str(statistics.median(rewards)))
f.write(",Mean: " + str(np.mean(rewards)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment