YannBerthelot

## gist:8218ca5ce0d1da2928ae9b1faa42ba86
import math
from math import cos, sin, ceil, floor
import numpy as np
from numpy import arcsin
from numpy.linalg import norm
from .graph_utils import plot_duo, plot_multiple, plot_xy


class FlightModel:
    def __init__(self):

## gist:3779e09fdc44c81c440a3cea9726e976
class CustomEnvironment(Environment):

    def __init__(self):
        super().__init__()

    def states(self):
        return dict(type='float', shape=(8,))

    def actions(self):
        return dict(type='int', num_values=4)

## gist:b2d57c2265977d294b97cb83cbe2ac68
def states(self):
        return dict(type="float", shape=(4,))

## gist:1b1b7c2c83e330ce5003bd4ce42301d5
    def states(self):
        return dict(type="float", shape=(self.STATES_SIZE,)) #self.STATES_SIZE = 4

    def actions(self):
        return {
            "thrust": dict(type="int", num_values=self.NUM_THRUST), #self.NUM_THRUST = 6 (50,60,70,80,90,100)
            "theta": dict(type="int", num_values=self.NUM_THETA), #self.NUM_THETA = 16 (from 0 to 15°)
        }

## states_and_actions.py
    def states(self):
        return dict(type="float", shape=(4,))

    def actions(self):
        return {
            "thrust": dict(type="int", num_values=6)
            "theta": dict(type="int", num_values=16)
        }


## terminal_states.py
def max_episode_timesteps(self):
        return 100
def terminal(self):
        self.finished = self.FlightModel.Pos[1] > 25 # The Agent suceeded
        self.episode_end = (self.FlightModel.timestep > self.max_step_per_episode) or (
            self.FlightModel.Pos[0] > 5000 # The Agent did not suceed (runs of the runway or takes too much time)
        )
        return self.finished or self.episode_end

## rewards.py
    def reward(self):
        if self.finished:
            reward = np.log(((5000 - self.FlightModel.Pos[0]) ** 2)) #if sucessfull reward based on how much runway is left (5000 being the length of the runway).
        else:
            reward = -1 #else, if unsucessful, give a small punition.
        return reward

## execute.txt
    def execute(self, actions):
        next_state = self.FlightModel.compute_timestep(actions)
        terminal = self.terminal()
        reward = self.reward()
        return next_state, terminal, reward

## execute.py
def execute(self, actions):
        next_state = self.FlightModel.compute_timestep(actions) #defined in our AirplaneModel, see below
        terminal = self.terminal() #defined earlier
        reward = self.reward() #defined earlier
        return next_state, terminal, reward

## environment.txt
from .FlightModel import FlightModel
import numpy as np
from tensorforce.environments import Environment


class PlaneEnvironment(Environment):
    def __init__(self):
        super().__init__()
        self.FlightModel = FlightModel()
        self.NUM_ACTIONS = len(self.FlightModel.action_vec)
	import math
	from math import cos, sin, ceil, floor
	import numpy as np
	from numpy import arcsin
	from numpy.linalg import norm
	from .graph_utils import plot_duo, plot_multiple, plot_xy


	class FlightModel:
	def __init__(self):
	class CustomEnvironment(Environment):

	def __init__(self):
	super().__init__()

	def states(self):
	return dict(type='float', shape=(8,))

	def actions(self):
	return dict(type='int', num_values=4)
	def states(self):
	return dict(type="float", shape=(self.STATES_SIZE,)) #self.STATES_SIZE = 4

	def actions(self):
	return {
	"thrust": dict(type="int", num_values=self.NUM_THRUST), #self.NUM_THRUST = 6 (50,60,70,80,90,100)
	"theta": dict(type="int", num_values=self.NUM_THETA), #self.NUM_THETA = 16 (from 0 to 15°)
	}
	def states(self):
	return dict(type="float", shape=(4,))

	def actions(self):
	return {
	"thrust": dict(type="int", num_values=6)
	"theta": dict(type="int", num_values=16)
	}
	def max_episode_timesteps(self):
	return 100
	def terminal(self):
	self.finished = self.FlightModel.Pos[1] > 25 # The Agent suceeded
	self.episode_end = (self.FlightModel.timestep > self.max_step_per_episode) or (
	self.FlightModel.Pos[0] > 5000 # The Agent did not suceed (runs of the runway or takes too much time)
	)
	return self.finished or self.episode_end
	def reward(self):
	if self.finished:
	reward = np.log(((5000 - self.FlightModel.Pos[0]) ** 2)) #if sucessfull reward based on how much runway is left (5000 being the length of the runway).
	else:
	reward = -1 #else, if unsucessful, give a small punition.
	return reward
	def execute(self, actions):
	next_state = self.FlightModel.compute_timestep(actions)
	terminal = self.terminal()
	reward = self.reward()
	return next_state, terminal, reward
	def execute(self, actions):
	next_state = self.FlightModel.compute_timestep(actions) #defined in our AirplaneModel, see below
	terminal = self.terminal() #defined earlier
	reward = self.reward() #defined earlier
	return next_state, terminal, reward
	from .FlightModel import FlightModel
	import numpy as np
	from tensorforce.environments import Environment


	class PlaneEnvironment(Environment):
	def __init__(self):
	super().__init__()
	self.FlightModel = FlightModel()
	self.NUM_ACTIONS = len(self.FlightModel.action_vec)