Skip to content

Instantly share code, notes, and snippets.

@Mikadun
Created July 19, 2021 11:33
Show Gist options
  • Save Mikadun/7ee616d34415949d09ea7ee53e52fca5 to your computer and use it in GitHub Desktop.
Save Mikadun/7ee616d34415949d09ea7ee53e52fca5 to your computer and use it in GitHub Desktop.
Gesture volume control with python opencv. Upgraded version of Murtaza's workshop
import cv2
import mediapipe as mp
import numpy as np
import math
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
np.set_printoptions(precision=3)
# short names to reference
mpDrawing = mp.solutions.drawing_utils
mpHands = mp.solutions.hands
LMS = mpHands.HandLandmark
# pycaw base code for master volume control
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
# take a video from camera with id = 0
capture = cv2.VideoCapture(0)
hands = mpHands.Hands(min_detection_confidence=0.7, max_num_hands=1)
while capture.isOpened():
success, image = capture.read()
# ignoring empty frame
if not success:
continue
# convert image to RGB format
imageRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
processedHands = hands.process(imageRGB)
# check if hands were found
if processedHands.multi_hand_landmarks is not None:
# loop through landmarks for each hand
for handLandmarks in processedHands.multi_hand_landmarks:
mp.solutions.drawing_utils.draw_landmarks(image, handLandmarks, mpHands.HAND_CONNECTIONS)
# Get coordinates for thumb tip and index finger tip. Also get coordinates for wrist to use in the Pythagoras rule
# Assuming that at maximum extension the fingers form a triangle we can calculate max distance between tips
# Using that fact we can normalize distance between finger tips
lm = handLandmarks.landmark
thumb = np.array([lm[LMS.THUMB_TIP].x, lm[LMS.THUMB_TIP].y])
index = np.array([lm[LMS.INDEX_FINGER_TIP].x, lm[LMS.INDEX_FINGER_TIP].y])
wrist = np.array([lm[LMS.WRIST].x, lm[LMS.WRIST].y])
# Get rough length of both fingers
indexLength = np.linalg.norm(index - wrist)
thumbLength = np.linalg.norm(thumb - wrist)
# Now calculate max distance (also lower it down because it rough calculations)
maxDistance = 0.7 * math.hypot(indexLength, thumbLength)
# Calculate current distance
length = np.linalg.norm(index - thumb)
# Map length value to value from -VOLUME_SPEED to VOLUME_SPEED
VOLUME_SPEED = 0.1
diff = np.interp(length, (0.0, maxDistance), [-VOLUME_SPEED, VOLUME_SPEED])
# Get volume in scalar (not in decibels) and set it in scalars
currentVolume = volume.GetMasterVolumeLevelScalar()
newVolume = min(max((currentVolume + diff), 0.0), 1.0)
volume.SetMasterVolumeLevelScalar(newVolume, None)
cv2.imshow('Image', image)
if cv2.waitKey(5) & 0xFF == 27:
break
# Finalization
capture.release()
cv2.destroyAllWindows()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment