Skip to content

Instantly share code, notes, and snippets.

@tigerneil
Last active May 16, 2016 13:15
Show Gist options
  • Save tigerneil/c009da57142519e0f5e1b48ed8aff13d to your computer and use it in GitHub Desktop.
Save tigerneil/c009da57142519e0f5e1b48ed8aff13d to your computer and use it in GitHub Desktop.
ddpg for openai gym
from __future__ import print_function
from __future__ import absolute_import
import os
os.environ['THEANO_FLAGS'] = 'device=cpu,mode=FAST_COMPILE,optimizer=None'
from rllab.algos.ddpg import DDPG
from rllab.envs.box2d.cartpole_env import CartpoleEnv
from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
from rllab.exploration_strategies.ou_strategy import OUStrategy
from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
from rllab.envs.gym_env import GymEnv
from rllab.envs.normalized_env import normalize
from rllab.misc.instrument import stub, run_experiment_lite
from nose2 import tools
import numpy as np
stub(globals())
env = normalize(GymEnv("Pendulum-v0"))
policy = DeterministicMLPPolicy(env.spec)
qf = ContinuousMLPQFunction(env.spec)
es = OUStrategy(env.spec)
algo = DDPG(
env=env, policy=policy, qf=qf, es=es,
n_epochs=10000,
epoch_length=100,
batch_size=64,
min_pool_size=500,
replay_pool_size=10000,
eval_samples=100,
)
run_experiment_lite(
algo.train(),
# Number of parallel workers for sampling
# n_parallel=1,
# Only keep the snapshot parameters for the last iteration
snapshot_mode="last",
# Specifies the seed for the experiment. If this is not provided, a random seed
# will be used
seed=1,
plot=True,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment