Skip to content

Instantly share code, notes, and snippets.

@vlastachu
Created February 14, 2024 10:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vlastachu/70e7140feb655872c1c4938dba70bcd7 to your computer and use it in GitHub Desktop.
Save vlastachu/70e7140feb655872c1c4938dba70bcd7 to your computer and use it in GitHub Desktop.
import SwiftUI
import AVFoundation
import CoreML
import Vision
struct ContentView: View {
@ObservedObject var cameraManager = CameraManager()
var body: some View {
VStack {
if let image = cameraManager.processedImage {
ZStack {
if let original = cameraManager.currentFrame {
Image(uiImage: original)
.resizable()
.aspectRatio(contentMode: .fit)
.rotationEffect(.degrees(90))
.scaleEffect(x: -1, y: 1)
}
Image(uiImage: image)
.resizable()
.rotationEffect(.degrees(90))
.scaleEffect(x: -1, y: 1)
.aspectRatio(contentMode: .fit)
}
} else {
Text("Waiting for camera feed...")
}
}
.onAppear {
cameraManager.startSession()
}
.onDisappear {
cameraManager.stopSession()
}
}
}
class CameraManager: NSObject, ObservableObject, AVCaptureVideoDataOutputSampleBufferDelegate {
@Published var processedImage: UIImage?
@Published var currentFrame: UIImage?
private var captureSession: AVCaptureSession?
private var videoOutput: AVCaptureVideoDataOutput?
private let model = try! VNCoreMLModel(for: DeepLabV3FP16(configuration: MLModelConfiguration()).model)
override init() {
super.init()
self.setupCamera()
}
private func setupCamera() {
let session = AVCaptureSession()
session.beginConfiguration()
guard let videoDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .front),
let videoInput = try? AVCaptureDeviceInput(device: videoDevice) else {
return
}
if session.canAddInput(videoInput) {
session.addInput(videoInput)
}
let videoOutput = AVCaptureVideoDataOutput()
videoOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "videoQueue"))
if session.canAddOutput(videoOutput) {
session.addOutput(videoOutput)
}
session.commitConfiguration()
self.captureSession = session
self.videoOutput = videoOutput
}
func startSession() {
DispatchQueue.global(qos: .userInitiated).async {
self.captureSession?.startRunning()
}
}
func stopSession() {
DispatchQueue.global(qos: .userInitiated).async {
self.captureSession?.stopRunning()
}
}
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
self.currentFrame = imageFromSampleBuffer(sampleBuffer: sampleBuffer)
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return
}
let request = VNCoreMLRequest(model: model) { request, error in
let results = request.results as? [VNCoreMLFeatureValueObservation]
let multiArray = results?.first?.featureValue.multiArrayValue
if (error != nil || results == nil || multiArray == nil) {
print("Failed to perform segmentation:", error?.localizedDescription ?? "Unknown error")
return
}
if let maskImage = self.createMaskImage(from: multiArray!, using: pixelBuffer) {
DispatchQueue.main.async {
self.processedImage = maskImage
}
}
}
try? VNImageRequestHandler(cvPixelBuffer: pixelBuffer, options: [:]).perform([request])
}
private func imageFromSampleBuffer(sampleBuffer: CMSampleBuffer) -> UIImage? {
guard let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return nil
}
let ciImage = CIImage(cvPixelBuffer: imageBuffer)
let context = CIContext()
guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
return nil
}
return UIImage(cgImage: cgImage)
}
func createMaskImage(from multiArray: MLMultiArray, using originalPixelBuffer: CVPixelBuffer) -> UIImage? {
guard let width = multiArray.shape[1] as? Int,
let height = multiArray.shape[0] as? Int else { return nil }
let bytesPerRow = width * 4
var pixelData = [UInt8](repeating: 0, count: width * height * 4)
for y in 0..<height {
for x in 0..<width {
let offset = (y * width + x) * 4
let classLabelIndex = UInt8(truncating: multiArray[y * width + x]) // Adjust based on your model's output
if classLabelIndex != 0 { // Define `targetClassLabel` based on your needs
// Set pixel to opaque for target class
pixelData[offset] = 255 // Red
pixelData[offset + 1] = 255 // Green
pixelData[offset + 2] = 255 // Blue
pixelData[offset + 3] = 255 // Alpha
} else {
// Set pixel to transparent for non-target classes
pixelData[offset + 3] = 0 // Alpha
}
}
}
let colorSpace = CGColorSpaceCreateDeviceRGB()
let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedLast.rawValue)
guard let context = CGContext(data: &pixelData, width: width, height: height, bitsPerComponent: 8, bytesPerRow: bytesPerRow, space: colorSpace, bitmapInfo: bitmapInfo.rawValue) else { return nil }
guard let cgImage = context.makeImage() else { return nil }
return UIImage(cgImage: cgImage)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment