Skip to content

Instantly share code, notes, and snippets.

@skeeet
Forked from AndreVallestero/gaze_tracker.py
Created June 12, 2024 09:09
Show Gist options
  • Save skeeet/659b59d8b5c4fb94e7b21b1499cc9145 to your computer and use it in GitHub Desktop.
Save skeeet/659b59d8b5c4fb94e7b21b1499cc9145 to your computer and use it in GitHub Desktop.
Fast, real-time gaze tracker using mediapipe and python (faster than dlib or openface)
# https://youtu.be/DNKAvDeqH_Y
# https://google.github.io/mediapipe/solutions/iris.html#ml-pipeline
# https://google.github.io/mediapipe/solutions/face_mesh.html#python-solution-api
import cv2 as cv
import numpy as np
from mediapipe import solutions
LEFT_IRIS = [pair[0] for pair in solutions.face_mesh.FACEMESH_LEFT_IRIS]
RIGHT_IRIS = [pair[0] for pair in solutions.face_mesh.FACEMESH_RIGHT_IRIS]
LEFT_EYE = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
RIGHT_EYE= [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
# solutions.face_mesh.FACEMESH_RIGHT_EYE has too many extra points, so we use our own list
RED = (0, 0, 255)
GREEN = (0, 255, 0)
BLUE = (255, 0, 0)
cam = cv.VideoCapture(0)
with solutions.face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
) as face_mesh:
while cam.isOpened():
success, frame = cam.read() # 11ms, camera operates at only 24fps, so we should only read every third frame to reduce load
if not success: continue
img_h, img_w = frame.shape[:2]
# get landmark points
results = face_mesh.process(cv.cvtColor(frame, cv.COLOR_BGR2RGB)) # 6ms
if results.multi_face_landmarks:
mesh_points = np.array([np.multiply([p.x, p.y], [img_w, img_h]).astype(int) for p in results.multi_face_landmarks[0].landmark])
# find center of iris and iris radius
center_left, l_radius = cv.minEnclosingCircle(mesh_points[LEFT_IRIS])
center_right, r_radius = cv.minEnclosingCircle(mesh_points[RIGHT_IRIS])
# render
center_left = np.array(center_left, dtype=np.int32)
center_right = np.array(center_right, dtype=np.int32)
cv.circle(frame, center_left, int(l_radius), RED, 1, cv.LINE_AA)
cv.circle(frame, center_right, int(r_radius), RED, 1, cv.LINE_AA)
for p in mesh_points[LEFT_IRIS]: cv.circle(frame, p, 0, GREEN) #frame[*p] = GREEN # use this syntax in Python 3.11+
for p in mesh_points[RIGHT_IRIS]: cv.circle(frame, p, 0, GREEN) #frame[*p] = GREEN
cv.circle(frame, center_left, 0, GREEN) #frame[*center_left] = GREEN
cv.circle(frame, center_right, 0, GREEN) #frame[*center_right] = GREEN
cv.polylines(frame, [np.array([mesh_points[p] for p in LEFT_EYE], dtype=np.int32)], True, BLUE, 1, cv.LINE_AA)
cv.polylines(frame, [np.array([mesh_points[p] for p in RIGHT_EYE], dtype=np.int32)], True, BLUE, 1, cv.LINE_AA)
cv.imshow('img', frame)
key = cv.waitKey(1)
if key == ord("q"): break
cam.release()
cv.destroyAllWindows()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment