Created
March 20, 2022 19:06
-
-
Save Alexsandr0x/780f15f53b84cdddfecbe4efe4830e47 to your computer and use it in GitHub Desktop.
vss-rsoccer-test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import random | |
from rsoccer_gym.Utils.Utils import OrnsteinUhlenbeckAction | |
from typing import Dict | |
import gym | |
import numpy as np | |
from rsoccer_gym.Entities import Frame, Robot, Ball | |
from rsoccer_gym.vss.vss_gym_base import VSSBaseEnv | |
from rsoccer_gym.Utils import KDTree | |
class VSSEnvTest(VSSBaseEnv): | |
"""This environment controls a single robot in a VSS soccer League 3v3 match | |
Description: | |
Observation: | |
Type: Box(40) | |
Normalized Bounds to [-1.25, 1.25] | |
Num Observation normalized | |
0 Ball X | |
1 Ball Y | |
2 Ball Vx | |
3 Ball Vy | |
4 + (7 * i) id i Blue Robot X | |
5 + (7 * i) id i Blue Robot Y | |
6 + (7 * i) id i Blue Robot sin(theta) | |
7 + (7 * i) id i Blue Robot cos(theta) | |
8 + (7 * i) id i Blue Robot Vx | |
9 + (7 * i) id i Blue Robot Vy | |
10 + (7 * i) id i Blue Robot v_theta | |
25 + (5 * i) id i Yellow Robot X | |
26 + (5 * i) id i Yellow Robot Y | |
27 + (5 * i) id i Yellow Robot Vx | |
28 + (5 * i) id i Yellow Robot Vy | |
29 + (5 * i) id i Yellow Robot v_theta | |
Actions: | |
Type: Box(2, ) | |
Num Action | |
0 id 0 Blue Left Wheel Speed (%) | |
1 id 0 Blue Right Wheel Speed (%) | |
Reward: | |
Sum of Rewards: | |
Goal | |
Ball Potential Gradient | |
Move to Ball | |
Energy Penalty | |
Starting State: | |
Randomized Robots and Ball initial Position | |
Episode Termination: | |
5 minutes match time | |
""" | |
def __init__(self): | |
super().__init__(field_type=0, n_robots_blue=1, n_robots_yellow=0, | |
time_step=0.1) | |
self.action_space = gym.spaces.Box(low=-1, high=1, | |
shape=(2, ), dtype=np.float32) | |
self.observation_space = gym.spaces.Box(low=-self.NORM_BOUNDS, | |
high=self.NORM_BOUNDS, | |
shape=(10, ), dtype=np.float32) | |
# Initialize Class Atributes | |
self.previous_ball_potential = None | |
self.actions: Dict = None | |
self.reward_shaping_total = None | |
self.v_wheel_deadzone = 0.05 | |
self.ou_actions = [] | |
for i in range(self.n_robots_blue + self.n_robots_yellow): | |
self.ou_actions.append( | |
OrnsteinUhlenbeckAction(self.action_space, dt=self.time_step) | |
) | |
print('Environment initialized') | |
def reset(self): | |
self.actions = None | |
self.reward_shaping_total = None | |
self.previous_ball_potential = None | |
for ou in self.ou_actions: | |
ou.reset() | |
return super().reset() | |
def step(self, action): | |
observation, reward, done, _ = super().step(action) | |
return observation, reward, done, self.reward_shaping_total | |
def _frame_to_observations(self): | |
observation = [] | |
observation.append(self.norm_pos(self.frame.ball.x)) | |
observation.append(self.norm_pos(self.frame.ball.y)) | |
observation.append(self.norm_v(self.frame.ball.v_x)) | |
observation.append(self.norm_v(self.frame.ball.v_y)) | |
for i in range(self.n_robots_blue): | |
observation.append(self.norm_pos(self.frame.robots_blue[i].x)) | |
observation.append(self.norm_pos(self.frame.robots_blue[i].y)) | |
observation.append( | |
np.sin(self.frame.robots_blue[i].theta) | |
) | |
observation.append(self.norm_v(self.frame.robots_blue[i].v_x)) | |
observation.append(self.norm_v(self.frame.robots_blue[i].v_y)) | |
observation.append(self.norm_w(self.frame.robots_blue[i].v_theta)) | |
return np.array(observation, dtype=np.float32) | |
def _get_commands(self, actions): | |
commands = [] | |
self.actions = {} | |
self.actions[0] = actions | |
v_wheel0, v_wheel1 = self._actions_to_v_wheels(actions) | |
commands.append(Robot(yellow=False, id=0, v_wheel0=v_wheel0, | |
v_wheel1=v_wheel1)) | |
return commands | |
def _calculate_reward_and_done(self): | |
reward = 0 | |
goal = False | |
w_move = 0.2 | |
w_ball_grad = 0.8 | |
w_energy = 2e-4 | |
if self.reward_shaping_total is None: | |
self.reward_shaping_total = {'goal_score': 0, 'move': 0, | |
'ball_grad': 0, 'energy': 0, | |
'goals_blue': 0, 'goals_yellow': 0} | |
# Check if goal ocurred | |
if self.frame.ball.x > (self.field.length / 2): | |
self.reward_shaping_total['goal_score'] += 1 | |
self.reward_shaping_total['goals_blue'] += 1 | |
reward = 10 | |
goal = True | |
elif self.frame.ball.x < -(self.field.length / 2): | |
self.reward_shaping_total['goal_score'] -= 1 | |
self.reward_shaping_total['goals_yellow'] += 1 | |
reward = -10 | |
goal = True | |
else: | |
if self.last_frame is not None: | |
# Calculate ball potential | |
grad_ball_potential = self.__ball_grad() | |
# Calculate Move ball | |
move_reward = self.__move_reward() | |
# Calculate Energy penalty | |
energy_penalty = self.__energy_penalty() | |
reward = w_move * move_reward + \ | |
w_ball_grad * grad_ball_potential + \ | |
w_energy * energy_penalty | |
self.reward_shaping_total['move'] += w_move * move_reward | |
self.reward_shaping_total['ball_grad'] += w_ball_grad \ | |
* grad_ball_potential | |
self.reward_shaping_total['energy'] += w_energy \ | |
* energy_penalty | |
return reward, goal | |
def _get_initial_positions_frame(self): | |
'''Returns the position of each robot and ball for the initial frame''' | |
field_half_length = self.field.length / 2 | |
field_half_width = self.field.width / 2 | |
def x(): return random.uniform(-field_half_length + 0.1, | |
field_half_length - 0.1) | |
def y(): return random.uniform(-field_half_width + 0.1, | |
field_half_width - 0.1) | |
def theta(): return random.uniform(0, 360) | |
pos_frame: Frame = Frame() | |
pos_frame.ball = Ball(x=x(), y=y()) | |
min_dist = 0.1 | |
places = KDTree() | |
places.insert((pos_frame.ball.x, pos_frame.ball.y)) | |
for i in range(self.n_robots_blue): | |
pos = (x(), y()) | |
while places.get_nearest(pos)[1] < min_dist: | |
pos = (x(), y()) | |
places.insert(pos) | |
pos_frame.robots_blue[i] = Robot(x=pos[0], y=pos[1], theta=theta()) | |
return pos_frame | |
def _actions_to_v_wheels(self, actions): | |
left_wheel_speed = actions[0] * self.max_v | |
right_wheel_speed = actions[1] * self.max_v | |
left_wheel_speed, right_wheel_speed = np.clip( | |
(left_wheel_speed, right_wheel_speed), -self.max_v, self.max_v | |
) | |
# Deadzone | |
if -self.v_wheel_deadzone < left_wheel_speed < self.v_wheel_deadzone: | |
left_wheel_speed = 0 | |
if -self.v_wheel_deadzone < right_wheel_speed < self.v_wheel_deadzone: | |
right_wheel_speed = 0 | |
# Convert to rad/s | |
left_wheel_speed /= self.field.rbt_wheel_radius | |
right_wheel_speed /= self.field.rbt_wheel_radius | |
return left_wheel_speed , right_wheel_speed | |
def __ball_grad(self): | |
'''Calculate ball potential gradient | |
Difference of potential of the ball in time_step seconds. | |
''' | |
# Calculate ball potential | |
length_cm = self.field.length * 100 | |
half_lenght = (self.field.length / 2.0)\ | |
+ self.field.goal_depth | |
# distance to defence | |
dx_d = (half_lenght + self.frame.ball.x) * 100 | |
# distance to attack | |
dx_a = (half_lenght - self.frame.ball.x) * 100 | |
dy = (self.frame.ball.y) * 100 | |
dist_1 = -math.sqrt(dx_a ** 2 + 2 * dy ** 2) | |
dist_2 = math.sqrt(dx_d ** 2 + 2 * dy ** 2) | |
ball_potential = ((dist_1 + dist_2) / length_cm - 1) / 2 | |
grad_ball_potential = 0 | |
# Calculate ball potential gradient | |
# = actual_potential - previous_potential | |
if self.previous_ball_potential is not None: | |
diff = ball_potential - self.previous_ball_potential | |
grad_ball_potential = np.clip(diff * 3 / self.time_step, | |
-5.0, 5.0) | |
self.previous_ball_potential = ball_potential | |
return grad_ball_potential | |
def __move_reward(self): | |
'''Calculate Move to ball reward | |
Cosine between the robot vel vector and the vector robot -> ball. | |
This indicates rather the robot is moving towards the ball or not. | |
''' | |
ball = np.array([self.frame.ball.x, self.frame.ball.y]) | |
robot = np.array([self.frame.robots_blue[0].x, | |
self.frame.robots_blue[0].y]) | |
robot_vel = np.array([self.frame.robots_blue[0].v_x, | |
self.frame.robots_blue[0].v_y]) | |
robot_ball = ball - robot | |
robot_ball = robot_ball/np.linalg.norm(robot_ball) | |
move_reward = np.dot(robot_ball, robot_vel) | |
move_reward = np.clip(move_reward / 0.4, -5.0, 5.0) | |
return move_reward | |
def __energy_penalty(self): | |
'''Calculates the energy penalty''' | |
en_penalty_1 = abs(self.sent_commands[0].v_wheel0) | |
en_penalty_2 = abs(self.sent_commands[0].v_wheel1) | |
energy_penalty = - (en_penalty_1 + en_penalty_2) | |
return energy_penalty | |
env = VSSEnvTest() | |
env.reset() | |
# Run for 1 episode and print reward at the end | |
for i in range(1): | |
done = False | |
while not done: | |
# Step using random actions | |
action = env.action_space.sample() | |
print(action) | |
next_state, reward, done, _ = env.step(action ) | |
print(next_state, reward, done, _) | |
env.render() | |
print(reward) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment