Skip to content

Instantly share code, notes, and snippets.

@israelio
Created December 8, 2022 12:59
Show Gist options
  • Save israelio/98f27bddddec27bc928aa5467149a3c2 to your computer and use it in GitHub Desktop.
Save israelio/98f27bddddec27bc928aa5467149a3c2 to your computer and use it in GitHub Desktop.
Reinforcement learning ev3 - final
import time
import random
from ev3dev2.motor import LargeMotor, OUTPUT_A, OUTPUT_B, MoveTank, SpeedPercent
from ev3dev2.sensor import INPUT_1, INPUT_2, INPUT_3, INPUT_4
from ev3dev2.sensor.lego import UltrasonicSensor
from ev3dev2.sound import Sound
SPEED = 20
DRIVE_TIME = 1
DO_NOTHING = 0
STEP_BACKWARD = 1
STEP_FORWARD = 2
STEP_RIGHT = 3
STEP_LEFT = 4
def step_forward(tank):
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(SPEED), DRIVE_TIME)
def step_backward(tank):
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(-SPEED), DRIVE_TIME)
def step_left(tank):
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(SPEED), DRIVE_TIME)
def step_right(tank):
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(-SPEED), DRIVE_TIME)
def do_nothing():
time.sleep(2)
def calculate_reward(prev_distance, curr_distance):
if curr_distance <= 70 or curr_distance > 220:
return -50
elif curr_distance < prev_distance:
return 1
elif curr_distance > prev_distance:
return -5
else:
return -5
def calculate_action_values(rewards, action_counts):
# create a list of action values with 0 set to each
action_values = [0 for _ in range(len(rewards))]
# action value = rewards / action_count
for i in range(len(rewards)):
action_values[i] = 0 if action_counts[i] == 0 else rewards[i]/action_counts[i]
return action_values
def main():
tank_drive = MoveTank(OUTPUT_A, OUTPUT_B)
ultrasonic = UltrasonicSensor(INPUT_2)
sound = Sound()
# sound.beep()
cycle = 0
# NC B F. R L
rewards = [0, 0, 0, 0, 0]
action_counts = [0, 0, 0, 0, 0]
# number of time i want to robot to explore vs exploite
# explore - figute out new things about the environment
epsilon = 0.05
prev_distance = ultrasonic.distance_centimeters
while True:
cycle = cycle + 1
print('-----[ ' + str(cycle) +' ]-----')
# no action selected
action=None
# draw a random number 0..1 to choose explore or exploite
rand = random.random()
print('random: '+str(rand))
if rand < epsilon: # epsilon greedy
# explore
action = random.randint(0,4) # 0,1,2 actions
else:
# exploite
action_values = calculate_action_values(rewards, action_counts)
max_ac_value = max(action_values)
print('max action value: '+str(max_ac_value))
action = action_values.index(max_ac_value)
print('action selected index: '+str(action))
if action == DO_NOTHING:
do_nothing()
elif action == STEP_BACKWARD:
step_backward(tank_drive)
elif action == STEP_FORWARD:
step_forward(tank_drive)
elif action == STEP_RIGHT:
step_right(tank_drive)
elif action == STEP_LEFT:
step_left(tank_drive)
print('Action: ' + str(action))
curr_distance = ultrasonic.distance_centimeters
print('Distance: ' + str(curr_distance))
reward = calculate_reward(prev_distance, curr_distance)
rewards[action] = rewards[action] + reward
action_counts[action] = action_counts[action] + 1
prev_distance = curr_distance
print('NC, B, F, R, L')
print('rewards:')
print(*rewards)
print('action counts:')
print(*action_counts)
time.sleep(0.5)
if __name__ == '__main__':
print('starting !')
main()
import time
import random
from ev3dev2.motor import LargeMotor, OUTPUT_A, OUTPUT_B, MoveTank, SpeedPercent
from ev3dev2.sensor import INPUT_1, INPUT_2, INPUT_3, INPUT_4
from ev3dev2.sensor.lego import UltrasonicSensor
from ev3dev2.sound import Sound
SPEED = 20
DRIVE_TIME = 1
def step_forward(tank):
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(SPEED), DRIVE_TIME)
def step_backward(tank):
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(-SPEED), DRIVE_TIME)
def step_left(tank):
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(SPEED), DRIVE_TIME)
def step_right(tank):
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(-SPEED), DRIVE_TIME)
def do_nothing():
time.sleep(2)
def main():
tank_drive = MoveTank(OUTPUT_A, OUTPUT_B)
sound = Sound()
sound.beep()
# do_nothing()
# step_backward(tank_drive)
# step_forward(tank_drive)
# step_right(tank_drive)
# step_left(tank_drive)
if __name__ == '__main__':
print('starting !')
main()
import time
import random
from ev3dev2.motor import LargeMotor, OUTPUT_A, OUTPUT_B, MoveTank, SpeedPercent
from ev3dev2.sensor import INPUT_1, INPUT_2, INPUT_3, INPUT_4
from ev3dev2.sensor.lego import UltrasonicSensor
from ev3dev2.sound import Sound
SPEED = 20
DRIVE_TIME = 1
DO_NOTHING = 0
STEP_BACKWARD = 1
STEP_FORWARD = 2
STEP_RIGHT = 3
STEP_LEFT = 4
def step_forward(tank):
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(SPEED), DRIVE_TIME)
def step_backward(tank):
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(-SPEED), DRIVE_TIME)
def step_left(tank):
tank.on_for_seconds(SpeedPercent(-SPEED), SpeedPercent(SPEED), DRIVE_TIME)
def step_right(tank):
tank.on_for_seconds(SpeedPercent(SPEED), SpeedPercent(-SPEED), DRIVE_TIME)
def do_nothing():
time.sleep(2)
def calculate_reward(prev_distance, curr_distance):
#
# add your code here. <<<<<<<<<==========
#
return xxxx
def calculate_action_values(rewards, action_counts):
# create a list of action values with 0 set to each
action_values = [0 for _ in range(len(rewards))]
# action value = rewards / action_count
for i in range(len(rewards)):
action_values[i] = 0 if action_counts[i] == 0 else rewards[i]/action_counts[i]
return action_values
def main():
tank_drive = MoveTank(OUTPUT_A, OUTPUT_B)
ultrasonic = UltrasonicSensor(INPUT_2)
sound = Sound()
sound.beep()
cycle = 0
# NC B F. R L
rewards = [0, 0, 0, 0, 0]
action_counts = [0, 0, 0, 0, 0]
# number of time i want to robot to explore vs exploite
# explore - figute out new things about the environment
epsilon = 0.05
prev_distance = ultrasonic.distance_centimeters
while True:
cycle = cycle + 1
print('-----[ ' + str(cycle) +' ]-----')
# no action selected
action=None
# draw a random number 0..1 to choose explore or exploite
rand = random.random()
print('random: '+str(rand))
if rand < epsilon: # epsilon greedy
# explore
action = random.randint(0,4) # 0,1,2 actions
else:
# exploite
action_values = calculate_action_values(rewards, action_counts)
max_ac_value = max(action_values)
print('max action value: '+str(max_ac_value))
action = action_values.index(max_ac_value)
print('action selected index: '+str(action))
if action == DO_NOTHING:
do_nothing()
elif action == STEP_BACKWARD:
step_backward(tank_drive)
elif action == STEP_FORWARD:
step_forward(tank_drive)
elif action == STEP_RIGHT:
step_right(tank_drive)
elif action == STEP_LEFT:
step_left(tank_drive)
print('Action: ' + str(action))
curr_distance = ultrasonic.distance_centimeters
print('Distance: ' + str(curr_distance))
reward = calculate_reward(prev_distance, curr_distance)
rewards[action] = rewards[action] + reward
action_counts[action] = action_counts[action] + 1
prev_distance = curr_distance
print('NC, B, F, R, L')
print('rewards:')
print(*rewards)
print('action counts:')
print(*action_counts)
time.sleep(0.5)
if __name__ == '__main__':
print('starting !')
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment