Skip to content

Instantly share code, notes, and snippets.

@yakushevichsv
Created September 28, 2021 18:35
Show Gist options
  • Save yakushevichsv/942bffda8c4e5be01abe2d0cf0bf19b8 to your computer and use it in GitHub Desktop.
Save yakushevichsv/942bffda8c4e5be01abe2d0cf0bf19b8 to your computer and use it in GitHub Desktop.
Contains code for detecting & tracking face, and approximate blink calculation.
//
// VisionFaceTracker.swift
// Test
//
// Created by Siarhei Yakushevich on 28.09.21.
//
import Foundation
import Vision
// MARK: - VisionFaceTracker
final class VisionFaceTracker {
private (set)var detectionRequests: [VNDetectFaceRectanglesRequest]!
private (set)var trackingRequests: [VNTrackObjectRequest]!
private (set)var sequenceRequestHandler: VNSequenceRequestHandler!
typealias FaceHandlingCompletionBlock = (_ faceTrackingInfo: [VisionFaceTrackingInfo]?, _ error: VisionFaceTrackingError?) -> Void
init() {
prepareRequests() // create requests
}
func handleFeed(sampleBuffer: CMSampleBuffer,
orientation exifOrientation: CGImagePropertyOrientation,
completion: @escaping FaceHandlingCompletionBlock) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
debugPrint("Failed to obtain a CVPixelBuffer for the current output frame.")
completion(nil, .receiveFrame)
return
}
handleFeedInner(pixelBuffer: pixelBuffer,
sampleBuffer: sampleBuffer,
orientaion: exifOrientation,
completion: completion)
}
private func handleSequence(requests: [VNTrackObjectRequest]?,
options requestHandlerOptions: [VNImageOption: AnyObject],
in pixelBuffer: CVImageBuffer,
exifOrientation: CGImagePropertyOrientation?) -> (success: Bool, error: Error?) {
guard let requests = requests, !requests.isEmpty else {
return (false, nil)
}
do {
if let exifOrientation = exifOrientation {
try sequenceRequestHandler.perform(requests,
on: pixelBuffer,
orientation: exifOrientation)
} else {
try sequenceRequestHandler.perform(requests,
on: pixelBuffer)
}
} catch {
return (false, error)
}
return (true, nil)
}
@discardableResult private func handle(requests: [VNImageBasedRequest]?,
options requestHandlerOptions: [VNImageOption: AnyObject],
in pixelBuffer: CVImageBuffer,
exifOrientation: CGImagePropertyOrientation?) -> (success: Bool, error: Error?) {
guard let requests = requests, !requests.isEmpty else {
return (false, nil)
}
// No tracking object detected, so perform initial detection
let imageRequestHandler = exifOrientation != nil ? VNImageRequestHandler(cvPixelBuffer: pixelBuffer,
orientation: exifOrientation!,
options: requestHandlerOptions)
: VNImageRequestHandler(cvPixelBuffer: pixelBuffer,
options: requestHandlerOptions)
do {
try imageRequestHandler.perform(requests)
} catch {
return (false, error)
}
return (true, nil)
}
private func handleFeedInner(pixelBuffer: CVImageBuffer,
sampleBuffer: CMSampleBuffer,
orientaion exifOrientation: CGImagePropertyOrientation,
completion: @escaping FaceHandlingCompletionBlock) {
var requestHandlerOptions = [VNImageOption: AnyObject]()
let cameraIntrinsicData = CMGetAttachment(sampleBuffer, key: kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, attachmentModeOut: nil)
if cameraIntrinsicData != nil {
requestHandlerOptions[.cameraIntrinsics] = cameraIntrinsicData
}
//requestHandlerOptions[.ciContext] = context
let orientation: CGImagePropertyOrientation? = nil //exifOrientation - [SY]: no point in orientation as it is detected from pixelBuffer...
let seqTuple = handleSequence(requests: trackingRequests,
options: requestHandlerOptions,
in: pixelBuffer,
exifOrientation: orientation)
if let error = seqTuple.error {
debugPrint("Failed to perform Sequence: %@", error)
completion(nil, .sequenceRequest(error: error))
return
} else if !seqTuple.success {
// No tracking object detected, so perform initial detection
let detectTuple = handle(requests: detectionRequests,
options: requestHandlerOptions,
in: pixelBuffer,
exifOrientation: orientation)
if let error = detectTuple.error {
debugPrint("Failed to perform FaceRectangleRequest: %@", error)
completion(nil, .detectFaceRequest(error: error))
}
return
}
// Setup the next round of tracking.
var newTrackingRequests = [VNTrackObjectRequest]()
for trackingRequest in trackingRequests! {
guard let results = trackingRequest.results else {
return
}
guard let observation = results.first as? VNDetectedObjectObservation else {
return
}
if !trackingRequest.isLastFrame {
if observation.confidence > 0.3 {
trackingRequest.inputObservation = observation
} else {
trackingRequest.isLastFrame = true
}
newTrackingRequests.append(trackingRequest)
}
}
self.trackingRequests = newTrackingRequests
let observation = newTrackingRequests.compactMap { $0.results?.first as? VNDetectedObjectObservation }
let faceLandmarkRequests = observation.map { (observation) -> VNDetectFaceLandmarksRequest in
let faceLandmarksRequest = VNDetectFaceLandmarksRequest(completionHandler: { (request, error) in
if error != nil {
debugPrint("!! FaceLandmarks error: \(String(describing: error)).")
}
guard let landmarksRequest = request as? VNDetectFaceLandmarksRequest,
let results = landmarksRequest.results else {
return
}
let blinkings = results.map { VisionFaceTrackingInfo(boundingBox: $0.boundingBox,
blinking: Self.detectBlinking(faceObservation: $0 )) }
completion(blinkings, nil)
})
let faceObservation = VNFaceObservation(boundingBox: observation.boundingBox)
faceLandmarksRequest.inputFaceObservations = [faceObservation]
return faceLandmarksRequest
}
let landmarksTuple = handle(requests: faceLandmarkRequests,
options: requestHandlerOptions,
in: pixelBuffer,
exifOrientation: orientation)
if let error = landmarksTuple.error {
debugPrint("!!! Failed to perform FaceLandmarkRequest: %@", error)
completion(nil, .faceLandmarkRequest(error: error))
}
}
}
// MARK: - calculate blink
private extension VisionFaceTracker {
class func detectBlinking(faceObservation: VNFaceObservation) -> Bool {
guard faceObservation.confidence >= 0.5, let landmarks = faceObservation.landmarks else {
debugPrint("!! No content!")
return false
}
debugPrint("Bounding box: \(faceObservation.boundingBox)")
if let left = calculateEAR(using: landmarks.leftEye), let right = calculateEAR(using: landmarks.rightEye) {
let averageEye = 0.5 * (left + right)
let blinked = averageEye < 1e-1
debugPrint("!! Value \(averageEye) Blinked \(blinked)")
return blinked
} else {
debugPrint("!! No Values")
return false
}
}
class func distanceSquared(from: CGPoint, to: CGPoint) -> CGFloat {
return (from.x - to.x) * (from.x - to.x) + (from.y - to.y) * (from.y - to.y)
}
class func distance(from: CGPoint, to: CGPoint) -> CGFloat {
return sqrt(distanceSquared(from: from, to: to))
}
class func calculateEAR(using eye: VNFaceLandmarkRegion2D?, boundingBox: CGRect? = nil) -> Float? {
//https://www.pyimagesearch.com/2017/04/24/eye-blink-detection-opencv-python-dlib/
guard let eye = eye else {
return nil
}
guard eye.pointCount == 6 else { // can be 8 points in iOS 12...
assertionFailure("Wrong number of counts \(eye.pointCount)")
return nil
}
debugPrint("!!! Points \(eye.normalizedPoints)")
let eye0 = eye.normalizedPoints[0]
let eye1 = eye.normalizedPoints[1]
let eye2 = eye.normalizedPoints[2]
let eye3 = eye.normalizedPoints[3]
let eye4 = eye.normalizedPoints[4]
let eye5 = eye.normalizedPoints[5]
let a = distance(from: eye1, to: eye5)
let b = distance(from: eye2, to: eye4)
let c = distance(from: eye0, to: eye3)
let ear = 0.5 * (a + b)/c
return Float(ear)
}
}
// MARK: - Prepare
extension VisionFaceTracker {
private func prepareRequests() {
let faceDetectionRequest = VNDetectFaceRectanglesRequest(completionHandler: { (request, error) in
if error != nil {
debugPrint("FaceDetection error: \(String(describing: error)).")
}
guard let faceDetectionRequest = request as? VNDetectFaceRectanglesRequest,
let results = faceDetectionRequest.results else {
return
}
let requests = results.map { VNTrackObjectRequest(detectedObjectObservation: $0) }
self.trackingRequests = requests
})
// Start with detection. Find face, then track it.
self.detectionRequests = [faceDetectionRequest]
self.sequenceRequestHandler = VNSequenceRequestHandler()
}
}
// MARK: - VisionFaceTrackingInfo
struct VisionFaceTrackingInfo {
let boundingBox: CGRect
let blinking: Bool
}
// MARK: - VisionFaceTrackingError
enum VisionFaceTrackingError: Error {
case receiveFrame
case detectFaceRequest(error: Error)
case sequenceRequest(error: Error)
case faceLandmarkRequest(error: Error)
var innerError: Error? {
switch self {
case .detectFaceRequest(let error),
.sequenceRequest(let error),
.faceLandmarkRequest(let error):
return error
case .receiveFrame:
return nil
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment