Skip to content

Instantly share code, notes, and snippets.

@YannBerthelot
Created April 28, 2020 08:57
Show Gist options
  • Save YannBerthelot/92eaca44bf85b3ce6c2894b10e742720 to your computer and use it in GitHub Desktop.
Save YannBerthelot/92eaca44bf85b3ce6c2894b10e742720 to your computer and use it in GitHub Desktop.
Environment
from .FlightModel import FlightModel
import numpy as np
from tensorforce.environments import Environment
class PlaneEnvironment(Environment):
def __init__(self):
super().__init__()
self.FlightModel = FlightModel()
self.NUM_ACTIONS = len(self.FlightModel.action_vec)
self.NUM_THRUST = len(self.FlightModel.thrust_act_vec)
self.NUM_THETA = len(self.FlightModel.theta_act_vec)
self.max_step_per_episode = 1000
self.finished = False
self.episode_end = False
self.STATES_SIZE = len(self.FlightModel.obs)
def states(self):
return dict(type="float", shape=(self.STATES_SIZE,))
def actions(self):
return {
"thrust": dict(type="int", num_values=self.NUM_THRUST),
"theta": dict(type="int", num_values=self.NUM_THETA),
}
# Optional, should only be defined if environment has a natural maximum
# episode length
def max_episode_timesteps(self):
return self.max_step_per_episode
# Optional
def close(self):
super().close()
def reset(self):
state = np.zeros(shape=(self.STATES_SIZE,))
self.FlightModel = FlightModel()
return state
def execute(self, actions):
next_state = self.FlightModel.compute_timestep(actions)
terminal = self.terminal()
reward = self.reward()
return next_state, terminal, reward
def terminal(self):
self.finished = self.FlightModel.Pos[1] > 25
self.episode_end = (self.FlightModel.timestep > self.max_step_per_episode) or (
self.FlightModel.Pos[0] > 5000
)
return self.finished or self.episode_end
def reward(self):
if self.finished:
reward = np.log(((5000 - self.FlightModel.Pos[0]) ** 2))
else:
reward = -1
return reward
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment