maxious/rpi-vision.demo.py

## rpi-vision.demo.py
# Python
import time
import logging
import argparse
import os
import sys
import numpy as np
import subprocess
import dothat.backlight as backlight
import dothat.lcd as lcd

# Reset the LED states and polarity
backlight.graph_off()

# Dim the LEDs by setting the max duty to 1
backlight.graph_set_led_duty(0, 1)


print("Button pressed!")
lcd.clear()
backlight.rgb(255, 255, 255)

CONFIDENCE_THRESHOLD = 0.5   # at what confidence level do we say we detected a thing
PERSISTANCE_THRESHOLD = 0.25  # what percentage of the time we have to have seen a thing
print("starting demo")
lcd.set_cursor_position(0,0)
lcd.write("starting demo")
lcd.set_cursor_position(0,1)
lcd.write("starting demo")
lcd.set_cursor_position(0,2)
lcd.write("starting demo")

# App
from rpi_vision.agent.capture import PiCameraStream
from rpi_vision.models.mobilenet_v2 import MobileNetV2Base

logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)


capture_manager = PiCameraStream(resolution=(640,480), rotation=0, preview=False)

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--include-top', type=bool,
                        dest='include_top', default=True,
                        help='Include fully-connected layer at the top of the network.')

    parser.add_argument('--tflite',
                        dest='tflite', action='store_true', default=False,
                        help='Convert base model to TFLite FlatBuffer, then load model into TFLite Python Interpreter')
    args = parser.parse_args()
    return args

last_seen = [None] * 10
last_spoken = None

def main(args):
    global last_spoken

    model = MobileNetV2Base(include_top=args.include_top)
    capture_manager.start()
    print("demo started")
    lcd.set_cursor_position(0,0)
    lcd.write("demo started")
    lcd.set_cursor_position(0,1)
    lcd.write("demo started")
    lcd.set_cursor_position(0,2)
    lcd.write("demo started")
    while not capture_manager.stopped:
        if capture_manager.frame is None:
            continue
        frame = capture_manager.read()

        timestamp = time.monotonic()
        if args.tflite:
            prediction = model.tflite_predict(frame)[0]
        else:
            prediction = model.predict(frame)[0]
        logging.info(prediction)
        delta = time.monotonic() - timestamp
        logging.info("%s inference took %d ms, %0.1f FPS" % ("TFLite" if args.tflite else "TF", delta * 1000, 1 / delta))
        lcd.set_cursor_position(0,0)
        lcd.write("%d ms, %0.1f FPS" % (delta * 1000, 1 / delta))
        print(last_seen)


        for p in prediction:
            label, name, conf = p
            if conf > CONFIDENCE_THRESHOLD:
                print("Detected", name)
                lcd.clear()
                #lcd.set_cursor_position(0,0)
                #lcd.write(name)
                lcd.set_cursor_position(0,1)
                lcd.write(name)

                persistant_obj = False  # assume the object is not persistant
                last_seen.append(name)
                last_seen.pop(0)

                inferred_times = last_seen.count(name)
                lcd.set_cursor_position(0,2)
                lcd.write("%d / %d = %0.1f" % ( inferred_times , len(last_seen),  inferred_times / len(last_seen)))
                if inferred_times / len(last_seen) > PERSISTANCE_THRESHOLD:  # over quarter time
                    persistant_obj = True

                detecttext = name.replace("_", " ")

                if persistant_obj and last_spoken != detecttext:
                    os.system('echo %s | festival --tts & ' % detecttext)
                    last_spoken = detecttext


                break
        else:
            last_seen.append(None)
            last_seen.pop(0)
            if last_seen.count(None) == len(last_seen):
                last_spoken = None


if __name__ == "__main__":
    args = parse_args()
    try:
        main(args)
    except KeyboardInterrupt:
        capture_manager.stop()
	# Python
	import time
	import logging
	import argparse
	import os
	import sys
	import numpy as np
	import subprocess
	import dothat.backlight as backlight
	import dothat.lcd as lcd

	# Reset the LED states and polarity
	backlight.graph_off()

	# Dim the LEDs by setting the max duty to 1
	backlight.graph_set_led_duty(0, 1)


	print("Button pressed!")
	lcd.clear()
	backlight.rgb(255, 255, 255)

	CONFIDENCE_THRESHOLD = 0.5 # at what confidence level do we say we detected a thing
	PERSISTANCE_THRESHOLD = 0.25 # what percentage of the time we have to have seen a thing
	print("starting demo")
	lcd.set_cursor_position(0,0)
	lcd.write("starting demo")
	lcd.set_cursor_position(0,1)
	lcd.write("starting demo")
	lcd.set_cursor_position(0,2)
	lcd.write("starting demo")

	# App
	from rpi_vision.agent.capture import PiCameraStream
	from rpi_vision.models.mobilenet_v2 import MobileNetV2Base

	logging.basicConfig()
	logging.getLogger().setLevel(logging.INFO)


	capture_manager = PiCameraStream(resolution=(640,480), rotation=0, preview=False)

	def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument('--include-top', type=bool,
	dest='include_top', default=True,
	help='Include fully-connected layer at the top of the network.')

	parser.add_argument('--tflite',
	dest='tflite', action='store_true', default=False,
	help='Convert base model to TFLite FlatBuffer, then load model into TFLite Python Interpreter')
	args = parser.parse_args()
	return args

	last_seen = [None] * 10
	last_spoken = None

	def main(args):
	global last_spoken

	model = MobileNetV2Base(include_top=args.include_top)
	capture_manager.start()
	print("demo started")
	lcd.set_cursor_position(0,0)
	lcd.write("demo started")
	lcd.set_cursor_position(0,1)
	lcd.write("demo started")
	lcd.set_cursor_position(0,2)
	lcd.write("demo started")
	while not capture_manager.stopped:
	if capture_manager.frame is None:
	continue
	frame = capture_manager.read()

	timestamp = time.monotonic()
	if args.tflite:
	prediction = model.tflite_predict(frame)[0]
	else:
	prediction = model.predict(frame)[0]
	logging.info(prediction)
	delta = time.monotonic() - timestamp
	logging.info("%s inference took %d ms, %0.1f FPS" % ("TFLite" if args.tflite else "TF", delta * 1000, 1 / delta))
	lcd.set_cursor_position(0,0)
	lcd.write("%d ms, %0.1f FPS" % (delta * 1000, 1 / delta))
	print(last_seen)


	for p in prediction:
	label, name, conf = p
	if conf > CONFIDENCE_THRESHOLD:
	print("Detected", name)
	lcd.clear()
	#lcd.set_cursor_position(0,0)
	#lcd.write(name)
	lcd.set_cursor_position(0,1)
	lcd.write(name)

	persistant_obj = False # assume the object is not persistant
	last_seen.append(name)
	last_seen.pop(0)

	inferred_times = last_seen.count(name)
	lcd.set_cursor_position(0,2)
	lcd.write("%d / %d = %0.1f" % ( inferred_times , len(last_seen), inferred_times / len(last_seen)))
	if inferred_times / len(last_seen) > PERSISTANCE_THRESHOLD: # over quarter time
	persistant_obj = True

	detecttext = name.replace("_", " ")

	if persistant_obj and last_spoken != detecttext:
	os.system('echo %s \| festival --tts & ' % detecttext)
	last_spoken = detecttext


	break
	else:
	last_seen.append(None)
	last_seen.pop(0)
	if last_seen.count(None) == len(last_seen):
	last_spoken = None


	if __name__ == "__main__":
	args = parse_args()
	try:
	main(args)
	except KeyboardInterrupt:
	capture_manager.stop()