Last active
March 11, 2024 02:26
-
-
Save kirisakow/325a557d89262e8d6a4f2918917e82b4 to your computer and use it in GitHub Desktop.
Real-time object detection in webcam video stream in Google Colab, using Ultralytics YOLOv8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [ | |
{ | |
"file_id": "https://gist.github.com/kirisakow/325a557d89262e8d6a4f2918917e82b4", | |
"timestamp": 1709989009226 | |
} | |
], | |
"private_outputs": true, | |
"collapsed_sections": ["r094Dy18OMsg", "RP0iY45PtSFW", "rex6xev5qP-G", "pUYfZKNlhSFn"] | |
}, | |
"kernelspec": { "name": "python3", "display_name": "Python 3" } | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/kirisakow/325a557d89262e8d6a4f2918917e82b4/real-time-object-detection-in-webcam-video-stream-using-ultralytics-yolov8.ipynb\" target=\"_blank\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Real-time object detection in webcam video stream using Ultralytics YOLOv8\n", | |
"\n", | |
"This is a refactored version of an older, YOLOv3-compatible [notebook][older_notebook] which no longer works.\n", | |
"\n", | |
"Instructions:\n", | |
"\n", | |
"* Run `Runtime` > `Run all` (or press <kbd>Ctrl</kbd> <kbd>F9</kbd>) to run all cells.\n", | |
"* Allow access to the webcam, if asked.\n", | |
"* To stop the webcam capture, click the red text or the picture.\n", | |
"\n", | |
"Resources:\n", | |
"* Ultralytics [documentation][ultralytics_doc] for object detection.\n", | |
"* Google Colab [documentation][ipython_display_doc] for executing JavaScript from Python for tasks such as webcam capture, etc.\n", | |
"\n", | |
"Author: [Kiril Isakov][kisakov_linkedin] ([kirisakow][kirisakow_github])\n", | |
"\n", | |
"[kisakov_linkedin]: https://www.linkedin.com/in/kisakov/\n", | |
"[kirisakow_github]: https://github.com/kirisakow\n", | |
"[older_notebook]: https://github.com/vindruid/yolov3-in-colab/blob/aaac930911e18826c560d3156220cedb8726b8c7/yolov3_streaming_webcam.ipynb\n", | |
"[ipython_display_doc]: https://colab.research.google.com/notebooks/snippets/advanced_outputs.ipynb#scrollTo=2viqYx97hPMi\n", | |
"[ultralytics_doc]: https://docs.ultralytics.com/tasks/detect/" | |
], | |
"metadata": { "id": "VPMjpFmrhUAc" } | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": ["## 0. Prerequisites"], | |
"metadata": { "id": "r094Dy18OMsg" } | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": ["### 0.1. Install and initialize libraries and constants"], | |
"metadata": { "id": "RP0iY45PtSFW" } | |
}, | |
{ | |
"cell_type": "code", | |
"source": ["! pip install --upgrade --quiet ultralytics"], | |
"metadata": { "id": "--kKX4f9WlAj" }, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from base64 import b64decode, b64encode\n", | |
"from google.colab.output import eval_js\n", | |
"from IPython.display import display, Javascript\n", | |
"from PIL import Image\n", | |
"from ultralytics import YOLO\n", | |
"from ultralytics.engine.results import Results\n", | |
"import io\n", | |
"import numpy as np\n", | |
"\n", | |
"MODEL_NAMES = ['yolov8n.pt', 'yolov8s.pt', 'yolov8m.pt', 'yolov8l.pt', 'yolov8x.pt']\n", | |
"PRE_TRAINED_MODEL = YOLO(MODEL_NAMES[0])\n", | |
"IMG_SHAPE = [640, 480]\n", | |
"IMG_QUALITY = 0.8" | |
], | |
"metadata": { "id": "XAgO_ab-roQ2" }, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { "id": "rex6xev5qP-G" }, | |
"source": [ | |
"### 0.2. Define JavaScript functions to capture webcam stream, and to be called by Python" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { "id": "7OYmjeF-edKE" }, | |
"source": [ | |
"def start_stream():\n", | |
" js = Javascript(f'''\n", | |
" const IMG_SHAPE = {IMG_SHAPE};\n", | |
" const IMG_QUALITY = {IMG_QUALITY};\n", | |
" ''' + '''\n", | |
" var video;\n", | |
" var div = null;\n", | |
" var stream;\n", | |
" var captureCanvas;\n", | |
" var imgElement;\n", | |
" var labelElement;\n", | |
"\n", | |
" var pendingResolve = null;\n", | |
" var shutdown = false;\n", | |
"\n", | |
" function removeDom() {\n", | |
" stream.getVideoTracks()[0].stop();\n", | |
" video.remove();\n", | |
" div.remove();\n", | |
" video = null;\n", | |
" div = null;\n", | |
" stream = null;\n", | |
" imgElement = null;\n", | |
" captureCanvas = null;\n", | |
" labelElement = null;\n", | |
" }\n", | |
"\n", | |
" function onAnimationFrame() {\n", | |
" if (!shutdown) {\n", | |
" window.requestAnimationFrame(onAnimationFrame);\n", | |
" }\n", | |
" if (pendingResolve) {\n", | |
" var result = \"\";\n", | |
" if (!shutdown) {\n", | |
" captureCanvas.getContext('2d').drawImage(video, 0, 0, IMG_SHAPE[0], IMG_SHAPE[1]);\n", | |
" result = captureCanvas.toDataURL('image/jpeg', IMG_QUALITY)\n", | |
" }\n", | |
" var lp = pendingResolve;\n", | |
" pendingResolve = null;\n", | |
" lp(result);\n", | |
" }\n", | |
" }\n", | |
"\n", | |
" async function createDom() {\n", | |
" if (div !== null) {\n", | |
" return stream;\n", | |
" }\n", | |
"\n", | |
" div = document.createElement('div');\n", | |
" div.style.border = '2px solid black';\n", | |
" div.style.padding = '3px';\n", | |
" div.style.width = '100%';\n", | |
" div.style.maxWidth = '600px';\n", | |
" document.body.appendChild(div);\n", | |
"\n", | |
" const modelOut = document.createElement('div');\n", | |
" modelOut.innerHTML = \"<span>Status: </span>\";\n", | |
" labelElement = document.createElement('span');\n", | |
" labelElement.innerText = 'No data';\n", | |
" labelElement.style.fontWeight = 'bold';\n", | |
" modelOut.appendChild(labelElement);\n", | |
" div.appendChild(modelOut);\n", | |
"\n", | |
" video = document.createElement('video');\n", | |
" video.style.display = 'block';\n", | |
" video.width = div.clientWidth - 6;\n", | |
" video.setAttribute('playsinline', '');\n", | |
" video.onclick = () => { shutdown = true; };\n", | |
" stream = await navigator.mediaDevices.getUserMedia(\n", | |
" {video: { facingMode: \"environment\"}});\n", | |
" div.appendChild(video);\n", | |
"\n", | |
" imgElement = document.createElement('img');\n", | |
" imgElement.style.position = 'absolute';\n", | |
" imgElement.style.zIndex = 1;\n", | |
" imgElement.onclick = () => { shutdown = true; };\n", | |
" div.appendChild(imgElement);\n", | |
"\n", | |
" const instruction = document.createElement('div');\n", | |
" instruction.innerHTML =\n", | |
" '<span style=\"color: red; font-weight: bold;\">' +\n", | |
" 'When finished, click here or on the video to stop this demo</span>';\n", | |
" div.appendChild(instruction);\n", | |
" instruction.onclick = () => { shutdown = true; };\n", | |
"\n", | |
" video.srcObject = stream;\n", | |
" await video.play();\n", | |
"\n", | |
" captureCanvas = document.createElement('canvas');\n", | |
" captureCanvas.width = IMG_SHAPE[0]; //video.videoWidth;\n", | |
" captureCanvas.height = IMG_SHAPE[1]; //video.videoHeight;\n", | |
" window.requestAnimationFrame(onAnimationFrame);\n", | |
"\n", | |
" return stream;\n", | |
" }\n", | |
" async function takePhoto(label, imgData) {\n", | |
" if (shutdown) {\n", | |
" removeDom();\n", | |
" shutdown = false;\n", | |
" return '';\n", | |
" }\n", | |
"\n", | |
" var preCreate = Date.now();\n", | |
" stream = await createDom();\n", | |
"\n", | |
" var preShow = Date.now();\n", | |
" if (label != \"\") {\n", | |
" labelElement.innerHTML = label;\n", | |
" }\n", | |
"\n", | |
" if (imgData != \"\") {\n", | |
" var videoRect = video.getClientRects()[0];\n", | |
" imgElement.style.top = videoRect.top + \"px\";\n", | |
" imgElement.style.left = videoRect.left + \"px\";\n", | |
" imgElement.style.width = videoRect.width + \"px\";\n", | |
" imgElement.style.height = videoRect.height + \"px\";\n", | |
" imgElement.src = imgData;\n", | |
" }\n", | |
"\n", | |
" var preCapture = Date.now();\n", | |
" var result = await new Promise((resolve, reject) => pendingResolve = resolve);\n", | |
" shutdown = false;\n", | |
"\n", | |
" return {\n", | |
" 'create': preShow - preCreate,\n", | |
" 'show': preCapture - preShow,\n", | |
" 'capture': Date.now() - preCapture,\n", | |
" 'img': result,\n", | |
" };\n", | |
" }\n", | |
" ''')\n", | |
" display(js)\n", | |
"\n", | |
"def take_photo(label, img_data):\n", | |
" data = eval_js(f'takePhoto(\"{label}\", \"{img_data}\")')\n", | |
" return data" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": ["### 0.3. Define Python functions to"], | |
"metadata": { "id": "pUYfZKNlhSFn" } | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { "id": "A0ZgJiQBskDt" }, | |
"source": [ | |
"def js_response_to_image(js_response) -> Image.Image:\n", | |
" _, b64_str = js_response['img'].split(',')\n", | |
" jpeg_bytes = b64decode(b64_str)\n", | |
" image = Image.open(io.BytesIO(jpeg_bytes))\n", | |
" return image\n", | |
"\n", | |
"def turn_non_black_pixels_visible(rgba_compatible_array: np.ndarray) -> np.ndarray:\n", | |
" rgba_compatible_array[:, :, 3] = (rgba_compatible_array.max(axis=2) > 0).astype(int) * 255\n", | |
" return rgba_compatible_array\n", | |
"\n", | |
"def black_transparent_rgba_canvas(w, h) -> np.ndarray:\n", | |
" return np.zeros([w, h, 4], dtype=np.uint8)\n", | |
"\n", | |
"def draw_annotations_on_transparent_bg(detection_result: Results) -> Image.Image:\n", | |
" black_rgba_canvas = black_transparent_rgba_canvas(*detection_result.orig_shape)\n", | |
" transparent_canvas_with_boxes_invisible = detection_result.plot(font='verdana', masks=False, img=black_rgba_canvas)\n", | |
" transparent_canvas_with_boxes_visible = turn_non_black_pixels_visible(transparent_canvas_with_boxes_invisible)\n", | |
" image = Image.fromarray(transparent_canvas_with_boxes_visible, 'RGBA')\n", | |
" return image" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { "id": "iIlKEQ3ruDCw" }, | |
"source": ["## 1. Perform real-time object detection in webcam stream"] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { "id": "hZi6NXSDyAY6" }, | |
"source": [ | |
"start_stream()\n", | |
"img_data = ''\n", | |
"while True:\n", | |
" js_response = take_photo('Capturing...', img_data)\n", | |
" if not js_response:\n", | |
" break\n", | |
" captured_img = js_response_to_image(js_response)\n", | |
" for detection_result in PRE_TRAINED_MODEL(source=np.array(captured_img), verbose=False):\n", | |
" annotations_img = draw_annotations_on_transparent_bg(detection_result)\n", | |
" with io.BytesIO() as buffer:\n", | |
" annotations_img.save(buffer, format='png')\n", | |
" img_as_base64_str = str(b64encode(buffer.getvalue()), 'utf-8')\n", | |
" img_data = f'data:image/png;base64,{img_as_base64_str}'" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment