Skip to content

Instantly share code, notes, and snippets.

@vwxyzjn
Created November 24, 2017 00:57
Show Gist options
  • Save vwxyzjn/d906703cdd96f84808c40a40aa8fe64e to your computer and use it in GitHub Desktop.
Save vwxyzjn/d906703cdd96f84808c40a40aa8fe64e to your computer and use it in GitHub Desktop.
import numpy as np
from tensorforce.agents import PPOAgent
from tensorforce.execution import Runner
from tensorforce.contrib.openai_gym import OpenAIGym
# Create an OpenAIgym environment
# ReversedAddition-v0
# CartPole-v0
env = OpenAIGym('ReversedAddition-v0', visualize=False)
print(env.gym.observation_space)
print(env.gym.action_space)
# Network as list of layers
network_spec = [
dict(type='embedding', size=32, indices=100),
dict(type='dense', size=32),
dict(type='dense', size=32)
]
agent = PPOAgent(
states_spec=env.states,
actions_spec=env.actions,
network_spec=network_spec,
batch_size=4096,
# Agent
preprocessing=None,
exploration=None,
reward_preprocessing=None,
# BatchAgent
keep_last_timestep=True,
# PPOAgent
step_optimizer=dict(
type='adam',
learning_rate=1e-3
),
optimization_steps=10,
# Model
scope='ppo',
discount=0.99,
# DistributionModel
distributions_spec=None,
entropy_regularization=0.01,
# PGModel
baseline_mode=None,
baseline=None,
baseline_optimizer=None,
gae_lambda=None,
normalize_rewards=False,
# PGLRModel
likelihood_ratio_clipping=0.2,
summary_spec=None,
distributed_spec=None
)
# Create the runner
runner = Runner(agent=agent, environment=env)
# Callback function printing episode statistics
def episode_finished(r):
print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep,
reward=r.episode_rewards[-1]))
return True
# Start learning
runner.run(episodes=10, max_episode_timesteps=200, episode_finished=episode_finished)
# Print statistics
print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format(
ep=runner.episode,
ar=np.mean(runner.episode_rewards[-100:]))
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment