Skip to content

Instantly share code, notes, and snippets.

View YannBerthelot's full-sized avatar

YannBerthelot

View GitHub Profile
import math
from math import cos, sin, ceil, floor
import numpy as np
from numpy import arcsin
from numpy.linalg import norm
from .graph_utils import plot_duo, plot_multiple, plot_xy
class FlightModel:
def __init__(self):
class CustomEnvironment(Environment):
def __init__(self):
super().__init__()
def states(self):
return dict(type='float', shape=(8,))
def actions(self):
return dict(type='int', num_values=4)
def states(self):
return dict(type="float", shape=(4,))
def states(self):
return dict(type="float", shape=(self.STATES_SIZE,)) #self.STATES_SIZE = 4
def actions(self):
return {
"thrust": dict(type="int", num_values=self.NUM_THRUST), #self.NUM_THRUST = 6 (50,60,70,80,90,100)
"theta": dict(type="int", num_values=self.NUM_THETA), #self.NUM_THETA = 16 (from 0 to 15°)
}
@YannBerthelot
YannBerthelot / states_and_actions.py
Last active August 23, 2020 15:18
States and actions definition
def states(self):
return dict(type="float", shape=(4,))
def actions(self):
return {
"thrust": dict(type="int", num_values=6)
"theta": dict(type="int", num_values=16)
}
@YannBerthelot
YannBerthelot / terminal_states.py
Last active August 23, 2020 15:20
Terminal states
def max_episode_timesteps(self):
return 100
def terminal(self):
self.finished = self.FlightModel.Pos[1] > 25 # The Agent suceeded
self.episode_end = (self.FlightModel.timestep > self.max_step_per_episode) or (
self.FlightModel.Pos[0] > 5000 # The Agent did not suceed (runs of the runway or takes too much time)
)
return self.finished or self.episode_end
@YannBerthelot
YannBerthelot / rewards.py
Last active August 23, 2020 15:29
Rewards
def reward(self):
if self.finished:
reward = np.log(((5000 - self.FlightModel.Pos[0]) ** 2)) #if sucessfull reward based on how much runway is left (5000 being the length of the runway).
else:
reward = -1 #else, if unsucessful, give a small punition.
return reward
def execute(self, actions):
next_state = self.FlightModel.compute_timestep(actions)
terminal = self.terminal()
reward = self.reward()
return next_state, terminal, reward
@YannBerthelot
YannBerthelot / execute.py
Last active August 23, 2020 15:34
execute
def execute(self, actions):
next_state = self.FlightModel.compute_timestep(actions) #defined in our AirplaneModel, see below
terminal = self.terminal() #defined earlier
reward = self.reward() #defined earlier
return next_state, terminal, reward
from .FlightModel import FlightModel
import numpy as np
from tensorforce.environments import Environment
class PlaneEnvironment(Environment):
def __init__(self):
super().__init__()
self.FlightModel = FlightModel()
self.NUM_ACTIONS = len(self.FlightModel.action_vec)