Created
January 11, 2020 18:32
-
-
Save nneubauer/fb1dbb4e95b01cb643bf0eb26226c2d2 to your computer and use it in GitHub Desktop.
TensorFlow Conv1 Example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tensorforce.agents import Agent | |
from tensorforce.environments import Environment | |
from tensorforce.execution import Runner | |
import pandas as pd | |
import numpy as np | |
import time | |
import array | |
class ForexEnvironment(Environment): | |
def __init__(self): | |
super().__init__() | |
self.data_points = 1440 | |
def states(self): | |
return dict(type='float', shape=(1, self.data_points)) | |
def actions(self): | |
return dict(type='int', num_values=3) | |
# Optional, should only be defined if environment has a natural maximum | |
# episode length | |
def max_episode_timesteps(self): | |
return self.data_points | |
def current_state_transposed(self): | |
return self.current_state().transpose() | |
def current_state(self): | |
random_state = np.random.uniform(size=(1440,)) | |
print(random_state) | |
return random_state | |
def reset(self): | |
return self.current_state_transposed() | |
def execute(self, actions): | |
terminal = False | |
next_state = self.current_state_transposed() | |
reward = 1.0 | |
#import pry; pry() | |
return next_state, terminal, reward | |
def main(): | |
environment = ForexEnvironment() | |
agent = Agent.create( | |
agent='dqn', environment=environment, | |
seed=8, | |
network=dict(type='layered', layers=[ | |
dict(type='conv1d', size=8), | |
dict(type='conv1d_transpose', size=8), | |
dict(type='flatten'), | |
dict(type='linear', size=8) | |
]) | |
) | |
#while we have time | |
for _ in range(2000): | |
#train for some training_steps | |
sum_of_sums = 0.0 | |
training_steps = 1000 | |
total_steps = 0 | |
for _ in range(training_steps): | |
start_time = time.time() | |
sum_rewards = 0.0 | |
states = environment.reset() | |
terminal = False | |
while not terminal: | |
actions = agent.act(states=states) | |
states, terminal, reward = environment.execute(actions=actions) | |
sum_rewards += reward | |
total_steps += 1 | |
agent.observe(terminal=terminal, reward=reward) | |
#print("Training Episode Reward: %.2f taking %d seconds." % (sum_rewards, (time.time() - start_time))) | |
sum_of_sums += sum_rewards | |
print('Average reward per Timestep:', sum_of_sums / total_steps) | |
# Close agent and environment | |
agent.close() | |
environment.close() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment