youngsoul/sign_language_deepstack_video.py

## sign_language_deepstack_video.py
# USAGE

import argparse
import imutils
from imutils.video import VideoStream

import time
import cv2
import requests


def predict_sign(frame, url):
    s = time.time()
    response = requests.post(url, files={"image": frame}).json()
    e = time.time()
    print(f"Inference took: {(e - s)} seconds")
    print(response)

    """
    {'success': True, 'predictions': [{'confidence': 0.939833, 'label': 'no', 'y_min': 81, 'x_min': 99, 'y_max': 193, 'x_max': 221}]}
    """
    if "success" in response and response['success'] == True and len(response['predictions']) > 0:
        prediction = response['predictions'][0]
        for object in response["predictions"]:
            print(object["label"])
    else:
        prediction = None

    return prediction


if __name__ == '__main__':
    # construct the argument parser and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("--deepstack-url", type=str,
                    default="http://localhost:5000/v1/vision/custom/sign",
                    help="url to running deepstack docker image")
    args = vars(ap.parse_args())

    deepstack_url = args['deepstack_url']

    # initialize the video stream and allow the camera sensor to warm up
    print("[INFO] starting video stream...")
    vs = VideoStream(src=0).start()
    time.sleep(2.0)

    color = (0, 255, 0)

    # loop over the frames from the video stream
    while True:
        # grab the frame from the threaded video stream and resize it
        # to have a maximum width of 400 pixels
        frame = vs.read()
        frame = imutils.resize(frame, width=400)
        success, encoded_image = cv2.imencode('.jpg', frame)
        source_image = content2 = encoded_image.tobytes()

        print("Predict....")
        prediction = predict_sign(source_image, deepstack_url)

        if prediction is not None:
            confidence = prediction['confidence']
            label = prediction['label']
            y_min = prediction['y_min']
            x_min = prediction['x_min']
            y_max = prediction['y_max']
            x_max = prediction['x_max']

            # display the label and bounding box rectangle on the output
            # 		# frame
            cv2.putText(frame, f"{label} {confidence}", (x_min, y_min - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)

        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1) & 0xFF

        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break

# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()
	# USAGE

	import argparse
	import imutils
	from imutils.video import VideoStream

	import time
	import cv2
	import requests


	def predict_sign(frame, url):
	s = time.time()
	response = requests.post(url, files={"image": frame}).json()
	e = time.time()
	print(f"Inference took: {(e - s)} seconds")
	print(response)

	"""
	{'success': True, 'predictions': [{'confidence': 0.939833, 'label': 'no', 'y_min': 81, 'x_min': 99, 'y_max': 193, 'x_max': 221}]}
	"""
	if "success" in response and response['success'] == True and len(response['predictions']) > 0:
	prediction = response['predictions'][0]
	for object in response["predictions"]:
	print(object["label"])
	else:
	prediction = None

	return prediction


	if __name__ == '__main__':
	# construct the argument parser and parse the arguments
	ap = argparse.ArgumentParser()
	ap.add_argument("--deepstack-url", type=str,
	default="http://localhost:5000/v1/vision/custom/sign",
	help="url to running deepstack docker image")
	args = vars(ap.parse_args())

	deepstack_url = args['deepstack_url']

	# initialize the video stream and allow the camera sensor to warm up
	print("[INFO] starting video stream...")
	vs = VideoStream(src=0).start()
	time.sleep(2.0)

	color = (0, 255, 0)

	# loop over the frames from the video stream
	while True:
	# grab the frame from the threaded video stream and resize it
	# to have a maximum width of 400 pixels
	frame = vs.read()
	frame = imutils.resize(frame, width=400)
	success, encoded_image = cv2.imencode('.jpg', frame)
	source_image = content2 = encoded_image.tobytes()

	print("Predict....")
	prediction = predict_sign(source_image, deepstack_url)

	if prediction is not None:
	confidence = prediction['confidence']
	label = prediction['label']
	y_min = prediction['y_min']
	x_min = prediction['x_min']
	y_max = prediction['y_max']
	x_max = prediction['x_max']

	# display the label and bounding box rectangle on the output
	# # frame
	cv2.putText(frame, f"{label} {confidence}", (x_min, y_min - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
	cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)

	cv2.imshow("Frame", frame)
	key = cv2.waitKey(1) & 0xFF

	# if the `q` key was pressed, break from the loop
	if key == ord("q"):
	break

	# do a bit of cleanup
	cv2.destroyAllWindows()
	vs.stop()