tigerneil/rllab_gym_ddpg.py

## rllab_gym_ddpg.py
from __future__ import print_function
from __future__ import absolute_import
import os

os.environ['THEANO_FLAGS'] = 'device=cpu,mode=FAST_COMPILE,optimizer=None'

from rllab.algos.ddpg import DDPG
from rllab.envs.box2d.cartpole_env import CartpoleEnv
from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
from rllab.exploration_strategies.ou_strategy import OUStrategy
from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
from rllab.envs.gym_env import GymEnv
from rllab.envs.normalized_env import normalize
from rllab.misc.instrument import stub, run_experiment_lite

from nose2 import tools
import numpy as np

stub(globals())

env = normalize(GymEnv("Pendulum-v0"))

policy = DeterministicMLPPolicy(env.spec)
qf = ContinuousMLPQFunction(env.spec)
es = OUStrategy(env.spec)

algo = DDPG(
    env=env, policy=policy, qf=qf, es=es,
    n_epochs=10000,
    epoch_length=100,
    batch_size=64,
    min_pool_size=500,
    replay_pool_size=10000,
    eval_samples=100,
)

run_experiment_lite(
    algo.train(),
    # Number of parallel workers for sampling
    # n_parallel=1,
    # Only keep the snapshot parameters for the last iteration
    snapshot_mode="last",
    # Specifies the seed for the experiment. If this is not provided, a random seed
    # will be used
    seed=1,
    plot=True,
)
	from __future__ import print_function
	from __future__ import absolute_import
	import os

	os.environ['THEANO_FLAGS'] = 'device=cpu,mode=FAST_COMPILE,optimizer=None'

	from rllab.algos.ddpg import DDPG
	from rllab.envs.box2d.cartpole_env import CartpoleEnv
	from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
	from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
	from rllab.exploration_strategies.ou_strategy import OUStrategy
	from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
	from rllab.envs.gym_env import GymEnv
	from rllab.envs.normalized_env import normalize
	from rllab.misc.instrument import stub, run_experiment_lite

	from nose2 import tools
	import numpy as np

	stub(globals())

	env = normalize(GymEnv("Pendulum-v0"))

	policy = DeterministicMLPPolicy(env.spec)
	qf = ContinuousMLPQFunction(env.spec)
	es = OUStrategy(env.spec)

	algo = DDPG(
	env=env, policy=policy, qf=qf, es=es,
	n_epochs=10000,
	epoch_length=100,
	batch_size=64,
	min_pool_size=500,
	replay_pool_size=10000,
	eval_samples=100,
	)

	run_experiment_lite(
	algo.train(),
	# Number of parallel workers for sampling
	# n_parallel=1,
	# Only keep the snapshot parameters for the last iteration
	snapshot_mode="last",
	# Specifies the seed for the experiment. If this is not provided, a random seed
	# will be used
	seed=1,
	plot=True,
	)