Created
December 8, 2022 12:59
-
-
Save israelio/98f27bddddec27bc928aa5467149a3c2 to your computer and use it in GitHub Desktop.
Reinforcement learning ev3 - final
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import random | |
from ev3dev2.motor import LargeMotor, OUTPUT_A, OUTPUT_B, MoveTank, SpeedPercent | |
from ev3dev2.sensor import INPUT_1, INPUT_2, INPUT_3, INPUT_4 | |
from ev3dev2.sensor.lego import UltrasonicSensor | |
from ev3dev2.sound import Sound | |
SPEED = 20 | |
DRIVE_TIME = 1 | |
DO_NOTHING = 0 | |
STEP_BACKWARD = 1 | |
STEP_FORWARD = 2 | |
STEP_RIGHT = 3 | |
STEP_LEFT = 4 | |
def step_forward(tank): | |
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(SPEED), DRIVE_TIME) | |
def step_backward(tank): | |
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(-SPEED), DRIVE_TIME) | |
def step_left(tank): | |
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(SPEED), DRIVE_TIME) | |
def step_right(tank): | |
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(-SPEED), DRIVE_TIME) | |
def do_nothing(): | |
time.sleep(2) | |
def calculate_reward(prev_distance, curr_distance): | |
if curr_distance <= 70 or curr_distance > 220: | |
return -50 | |
elif curr_distance < prev_distance: | |
return 1 | |
elif curr_distance > prev_distance: | |
return -5 | |
else: | |
return -5 | |
def calculate_action_values(rewards, action_counts): | |
# create a list of action values with 0 set to each | |
action_values = [0 for _ in range(len(rewards))] | |
# action value = rewards / action_count | |
for i in range(len(rewards)): | |
action_values[i] = 0 if action_counts[i] == 0 else rewards[i]/action_counts[i] | |
return action_values | |
def main(): | |
tank_drive = MoveTank(OUTPUT_A, OUTPUT_B) | |
ultrasonic = UltrasonicSensor(INPUT_2) | |
sound = Sound() | |
# sound.beep() | |
cycle = 0 | |
# NC B F. R L | |
rewards = [0, 0, 0, 0, 0] | |
action_counts = [0, 0, 0, 0, 0] | |
# number of time i want to robot to explore vs exploite | |
# explore - figute out new things about the environment | |
epsilon = 0.05 | |
prev_distance = ultrasonic.distance_centimeters | |
while True: | |
cycle = cycle + 1 | |
print('-----[ ' + str(cycle) +' ]-----') | |
# no action selected | |
action=None | |
# draw a random number 0..1 to choose explore or exploite | |
rand = random.random() | |
print('random: '+str(rand)) | |
if rand < epsilon: # epsilon greedy | |
# explore | |
action = random.randint(0,4) # 0,1,2 actions | |
else: | |
# exploite | |
action_values = calculate_action_values(rewards, action_counts) | |
max_ac_value = max(action_values) | |
print('max action value: '+str(max_ac_value)) | |
action = action_values.index(max_ac_value) | |
print('action selected index: '+str(action)) | |
if action == DO_NOTHING: | |
do_nothing() | |
elif action == STEP_BACKWARD: | |
step_backward(tank_drive) | |
elif action == STEP_FORWARD: | |
step_forward(tank_drive) | |
elif action == STEP_RIGHT: | |
step_right(tank_drive) | |
elif action == STEP_LEFT: | |
step_left(tank_drive) | |
print('Action: ' + str(action)) | |
curr_distance = ultrasonic.distance_centimeters | |
print('Distance: ' + str(curr_distance)) | |
reward = calculate_reward(prev_distance, curr_distance) | |
rewards[action] = rewards[action] + reward | |
action_counts[action] = action_counts[action] + 1 | |
prev_distance = curr_distance | |
print('NC, B, F, R, L') | |
print('rewards:') | |
print(*rewards) | |
print('action counts:') | |
print(*action_counts) | |
time.sleep(0.5) | |
if __name__ == '__main__': | |
print('starting !') | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import random | |
from ev3dev2.motor import LargeMotor, OUTPUT_A, OUTPUT_B, MoveTank, SpeedPercent | |
from ev3dev2.sensor import INPUT_1, INPUT_2, INPUT_3, INPUT_4 | |
from ev3dev2.sensor.lego import UltrasonicSensor | |
from ev3dev2.sound import Sound | |
SPEED = 20 | |
DRIVE_TIME = 1 | |
def step_forward(tank): | |
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(SPEED), DRIVE_TIME) | |
def step_backward(tank): | |
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(-SPEED), DRIVE_TIME) | |
def step_left(tank): | |
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(SPEED), DRIVE_TIME) | |
def step_right(tank): | |
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(-SPEED), DRIVE_TIME) | |
def do_nothing(): | |
time.sleep(2) | |
def main(): | |
tank_drive = MoveTank(OUTPUT_A, OUTPUT_B) | |
sound = Sound() | |
sound.beep() | |
# do_nothing() | |
# step_backward(tank_drive) | |
# step_forward(tank_drive) | |
# step_right(tank_drive) | |
# step_left(tank_drive) | |
if __name__ == '__main__': | |
print('starting !') | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import random | |
from ev3dev2.motor import LargeMotor, OUTPUT_A, OUTPUT_B, MoveTank, SpeedPercent | |
from ev3dev2.sensor import INPUT_1, INPUT_2, INPUT_3, INPUT_4 | |
from ev3dev2.sensor.lego import UltrasonicSensor | |
from ev3dev2.sound import Sound | |
SPEED = 20 | |
DRIVE_TIME = 1 | |
DO_NOTHING = 0 | |
STEP_BACKWARD = 1 | |
STEP_FORWARD = 2 | |
STEP_RIGHT = 3 | |
STEP_LEFT = 4 | |
def step_forward(tank): | |
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(SPEED), DRIVE_TIME) | |
def step_backward(tank): | |
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(-SPEED), DRIVE_TIME) | |
def step_left(tank): | |
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(SPEED), DRIVE_TIME) | |
def step_right(tank): | |
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(-SPEED), DRIVE_TIME) | |
def do_nothing(): | |
time.sleep(2) | |
def calculate_reward(prev_distance, curr_distance): | |
# | |
# add your code here. <<<<<<<<<========== | |
# | |
return xxxx | |
def calculate_action_values(rewards, action_counts): | |
# create a list of action values with 0 set to each | |
action_values = [0 for _ in range(len(rewards))] | |
# action value = rewards / action_count | |
for i in range(len(rewards)): | |
action_values[i] = 0 if action_counts[i] == 0 else rewards[i]/action_counts[i] | |
return action_values | |
def main(): | |
tank_drive = MoveTank(OUTPUT_A, OUTPUT_B) | |
ultrasonic = UltrasonicSensor(INPUT_2) | |
sound = Sound() | |
sound.beep() | |
cycle = 0 | |
# NC B F. R L | |
rewards = [0, 0, 0, 0, 0] | |
action_counts = [0, 0, 0, 0, 0] | |
# number of time i want to robot to explore vs exploite | |
# explore - figute out new things about the environment | |
epsilon = 0.05 | |
prev_distance = ultrasonic.distance_centimeters | |
while True: | |
cycle = cycle + 1 | |
print('-----[ ' + str(cycle) +' ]-----') | |
# no action selected | |
action=None | |
# draw a random number 0..1 to choose explore or exploite | |
rand = random.random() | |
print('random: '+str(rand)) | |
if rand < epsilon: # epsilon greedy | |
# explore | |
action = random.randint(0,4) # 0,1,2 actions | |
else: | |
# exploite | |
action_values = calculate_action_values(rewards, action_counts) | |
max_ac_value = max(action_values) | |
print('max action value: '+str(max_ac_value)) | |
action = action_values.index(max_ac_value) | |
print('action selected index: '+str(action)) | |
if action == DO_NOTHING: | |
do_nothing() | |
elif action == STEP_BACKWARD: | |
step_backward(tank_drive) | |
elif action == STEP_FORWARD: | |
step_forward(tank_drive) | |
elif action == STEP_RIGHT: | |
step_right(tank_drive) | |
elif action == STEP_LEFT: | |
step_left(tank_drive) | |
print('Action: ' + str(action)) | |
curr_distance = ultrasonic.distance_centimeters | |
print('Distance: ' + str(curr_distance)) | |
reward = calculate_reward(prev_distance, curr_distance) | |
rewards[action] = rewards[action] + reward | |
action_counts[action] = action_counts[action] + 1 | |
prev_distance = curr_distance | |
print('NC, B, F, R, L') | |
print('rewards:') | |
print(*rewards) | |
print('action counts:') | |
print(*action_counts) | |
time.sleep(0.5) | |
if __name__ == '__main__': | |
print('starting !') | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment