Skip to content

Instantly share code, notes, and snippets.

@nneubauer
Created January 11, 2020 18:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nneubauer/fb1dbb4e95b01cb643bf0eb26226c2d2 to your computer and use it in GitHub Desktop.
Save nneubauer/fb1dbb4e95b01cb643bf0eb26226c2d2 to your computer and use it in GitHub Desktop.
TensorFlow Conv1 Example
from tensorforce.agents import Agent
from tensorforce.environments import Environment
from tensorforce.execution import Runner
import pandas as pd
import numpy as np
import time
import array
class ForexEnvironment(Environment):
def __init__(self):
super().__init__()
self.data_points = 1440
def states(self):
return dict(type='float', shape=(1, self.data_points))
def actions(self):
return dict(type='int', num_values=3)
# Optional, should only be defined if environment has a natural maximum
# episode length
def max_episode_timesteps(self):
return self.data_points
def current_state_transposed(self):
return self.current_state().transpose()
def current_state(self):
random_state = np.random.uniform(size=(1440,))
print(random_state)
return random_state
def reset(self):
return self.current_state_transposed()
def execute(self, actions):
terminal = False
next_state = self.current_state_transposed()
reward = 1.0
#import pry; pry()
return next_state, terminal, reward
def main():
environment = ForexEnvironment()
agent = Agent.create(
agent='dqn', environment=environment,
seed=8,
network=dict(type='layered', layers=[
dict(type='conv1d', size=8),
dict(type='conv1d_transpose', size=8),
dict(type='flatten'),
dict(type='linear', size=8)
])
)
#while we have time
for _ in range(2000):
#train for some training_steps
sum_of_sums = 0.0
training_steps = 1000
total_steps = 0
for _ in range(training_steps):
start_time = time.time()
sum_rewards = 0.0
states = environment.reset()
terminal = False
while not terminal:
actions = agent.act(states=states)
states, terminal, reward = environment.execute(actions=actions)
sum_rewards += reward
total_steps += 1
agent.observe(terminal=terminal, reward=reward)
#print("Training Episode Reward: %.2f taking %d seconds." % (sum_rewards, (time.time() - start_time)))
sum_of_sums += sum_rewards
print('Average reward per Timestep:', sum_of_sums / total_steps)
# Close agent and environment
agent.close()
environment.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment