Created
February 14, 2024 10:42
-
-
Save vlastachu/70e7140feb655872c1c4938dba70bcd7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import SwiftUI | |
import AVFoundation | |
import CoreML | |
import Vision | |
struct ContentView: View { | |
@ObservedObject var cameraManager = CameraManager() | |
var body: some View { | |
VStack { | |
if let image = cameraManager.processedImage { | |
ZStack { | |
if let original = cameraManager.currentFrame { | |
Image(uiImage: original) | |
.resizable() | |
.aspectRatio(contentMode: .fit) | |
.rotationEffect(.degrees(90)) | |
.scaleEffect(x: -1, y: 1) | |
} | |
Image(uiImage: image) | |
.resizable() | |
.rotationEffect(.degrees(90)) | |
.scaleEffect(x: -1, y: 1) | |
.aspectRatio(contentMode: .fit) | |
} | |
} else { | |
Text("Waiting for camera feed...") | |
} | |
} | |
.onAppear { | |
cameraManager.startSession() | |
} | |
.onDisappear { | |
cameraManager.stopSession() | |
} | |
} | |
} | |
class CameraManager: NSObject, ObservableObject, AVCaptureVideoDataOutputSampleBufferDelegate { | |
@Published var processedImage: UIImage? | |
@Published var currentFrame: UIImage? | |
private var captureSession: AVCaptureSession? | |
private var videoOutput: AVCaptureVideoDataOutput? | |
private let model = try! VNCoreMLModel(for: DeepLabV3FP16(configuration: MLModelConfiguration()).model) | |
override init() { | |
super.init() | |
self.setupCamera() | |
} | |
private func setupCamera() { | |
let session = AVCaptureSession() | |
session.beginConfiguration() | |
guard let videoDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .front), | |
let videoInput = try? AVCaptureDeviceInput(device: videoDevice) else { | |
return | |
} | |
if session.canAddInput(videoInput) { | |
session.addInput(videoInput) | |
} | |
let videoOutput = AVCaptureVideoDataOutput() | |
videoOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "videoQueue")) | |
if session.canAddOutput(videoOutput) { | |
session.addOutput(videoOutput) | |
} | |
session.commitConfiguration() | |
self.captureSession = session | |
self.videoOutput = videoOutput | |
} | |
func startSession() { | |
DispatchQueue.global(qos: .userInitiated).async { | |
self.captureSession?.startRunning() | |
} | |
} | |
func stopSession() { | |
DispatchQueue.global(qos: .userInitiated).async { | |
self.captureSession?.stopRunning() | |
} | |
} | |
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { | |
self.currentFrame = imageFromSampleBuffer(sampleBuffer: sampleBuffer) | |
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { | |
return | |
} | |
let request = VNCoreMLRequest(model: model) { request, error in | |
let results = request.results as? [VNCoreMLFeatureValueObservation] | |
let multiArray = results?.first?.featureValue.multiArrayValue | |
if (error != nil || results == nil || multiArray == nil) { | |
print("Failed to perform segmentation:", error?.localizedDescription ?? "Unknown error") | |
return | |
} | |
if let maskImage = self.createMaskImage(from: multiArray!, using: pixelBuffer) { | |
DispatchQueue.main.async { | |
self.processedImage = maskImage | |
} | |
} | |
} | |
try? VNImageRequestHandler(cvPixelBuffer: pixelBuffer, options: [:]).perform([request]) | |
} | |
private func imageFromSampleBuffer(sampleBuffer: CMSampleBuffer) -> UIImage? { | |
guard let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { | |
return nil | |
} | |
let ciImage = CIImage(cvPixelBuffer: imageBuffer) | |
let context = CIContext() | |
guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else { | |
return nil | |
} | |
return UIImage(cgImage: cgImage) | |
} | |
func createMaskImage(from multiArray: MLMultiArray, using originalPixelBuffer: CVPixelBuffer) -> UIImage? { | |
guard let width = multiArray.shape[1] as? Int, | |
let height = multiArray.shape[0] as? Int else { return nil } | |
let bytesPerRow = width * 4 | |
var pixelData = [UInt8](repeating: 0, count: width * height * 4) | |
for y in 0..<height { | |
for x in 0..<width { | |
let offset = (y * width + x) * 4 | |
let classLabelIndex = UInt8(truncating: multiArray[y * width + x]) // Adjust based on your model's output | |
if classLabelIndex != 0 { // Define `targetClassLabel` based on your needs | |
// Set pixel to opaque for target class | |
pixelData[offset] = 255 // Red | |
pixelData[offset + 1] = 255 // Green | |
pixelData[offset + 2] = 255 // Blue | |
pixelData[offset + 3] = 255 // Alpha | |
} else { | |
// Set pixel to transparent for non-target classes | |
pixelData[offset + 3] = 0 // Alpha | |
} | |
} | |
} | |
let colorSpace = CGColorSpaceCreateDeviceRGB() | |
let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedLast.rawValue) | |
guard let context = CGContext(data: &pixelData, width: width, height: height, bitsPerComponent: 8, bytesPerRow: bytesPerRow, space: colorSpace, bitmapInfo: bitmapInfo.rawValue) else { return nil } | |
guard let cgImage = context.makeImage() else { return nil } | |
return UIImage(cgImage: cgImage) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment