Skip to content

Instantly share code, notes, and snippets.

@befelix
Created July 17, 2017 20:40
Show Gist options
  • Save befelix/4fb6e348761b2d8caa2b8d7bc7e9f0ce to your computer and use it in GitHub Desktop.
Save befelix/4fb6e348761b2d8caa2b8d7bc7e9f0ce to your computer and use it in GitHub Desktop.
from tensorforce import Configuration
from tensorforce.agents import TRPOAgent
from tensorforce.core.networks import layered_network_builder
import numpy as np
network_config = [{"type": "dense", "size": 50, "activation": "relu"}]
states = dict(shape=(2,), type='float')
actions = dict(action1=dict(continuous=True),
action2=dict(continuous=True))
config = {'network': layered_network_builder(network_config),
'states': states,
'actions': actions,
'batch_size': 10}
agent = TRPOAgent(Configuration(**config))
# Run experiment
for i in range(100):
state = np.random.randn(2)
action = agent.act(state=state)
reward = np.random.randn()
agent.observe(reward=reward, terminal=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment