Skip to content

Instantly share code, notes, and snippets.

@wood1986
Forked from ctreffs/MTLARFrameProcessor.swift
Created April 14, 2024 05:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wood1986/a2211b4a17f96646f2288e1cd58d5d4f to your computer and use it in GitHub Desktop.
Save wood1986/a2211b4a17f96646f2288e1cd58d5d4f to your computer and use it in GitHub Desktop.
Processes ARKits' ARFrame->capturedImage CVPixelBuffer according to the documentation into an sRGB image
import CoreImage
import CoreVideo
import ImageIO
import Metal
/// Processes ARKits' ARFrame->capturedImage CVPixelBuffer according to the documentation into an sRGB image.
///
/// ARKit captures pixel buffers in a full-range planar YCbCr format (also known as YUV) format according to the ITU R. 601-4 standard.
/// (You can verify this by checking the kCVImageBufferYCbCrMatrixKey pixel buffer attachment.)
/// Unlike some uses of that standard, ARKit captures full-range color space values, not video-range values.
/// To correctly render these images on a device display, you'll need to access the luma and chroma planes of the pixel buffer and
/// convert full-range YCbCr values to an sRGB (or ITU R. 709) format according to the ITU-T T.871 specification.
/// <https://developer.apple.com/documentation/arkit/arframe/2867984-capturedimage>
public final class MTLARFrameProcessor {
public let ciContext: CIContext
public let colorSpace: CGColorSpace
let device: MTLDevice
let queue: MTLCommandQueue
let library: MTLLibrary
let pipelineStateCompute: MTLComputePipelineState
let textureCache: CVMetalTextureCache
var colorRGBTexture: MTLTexture!
var colorYContent: MTLTexture!
var colorCbCrContent: MTLTexture!
public init() throws {
colorSpace = try eval(CGColorSpace(name: CGColorSpace.sRGB), orThrow: .failedToCreateColorSpace(CGColorSpace.sRGB))
let device = try eval(MTLCreateSystemDefaultDevice(), orThrow: .failedToCreateMTLDevice)
self.device = device
self.ciContext = CIContext(mtlDevice: device)
self.queue = try eval(device.makeCommandQueue(), orThrow: .failedToCreateMTLCommandQueue)
let library = try device.makeLibrary(source: Self.shader, options: nil)
self.library = library
let convertYUV2RGBFunc = try eval(library.makeFunction(name: "convertYCbCrToRGBA"), orThrow: .failedToMakeMTLFunction("convertYCbCrToRGBA"))
pipelineStateCompute = try device.makeComputePipelineState(function: convertYUV2RGBFunc)
self.textureCache = try eval({
var textureCache: CVMetalTextureCache?
CVMetalTextureCacheCreate(nil, nil, device, nil, &textureCache)
return textureCache
}, orThrow: .failedToCreateCVMetalTextureCache)
}
public func convertToCIImage(capturedImage pixelBuffer: CVPixelBuffer, orientation: CGImagePropertyOrientation) throws -> CIImage {
let width = CVPixelBufferGetWidth(pixelBuffer)
let height = CVPixelBufferGetHeight(pixelBuffer)
colorRGBTexture = try Self.createTexture(
metalDevice: device,
width: width,
height: height,
usage: [.shaderRead, .shaderWrite],
pixelFormat: .rgba32Float
)
colorYContent = try pixelBuffer.texture(withFormat: .r8Unorm, planeIndex: 0, addToCache: textureCache)
colorCbCrContent = try pixelBuffer.texture(withFormat: .rg8Unorm, planeIndex: 1, addToCache: textureCache)
let cmdBuffer = try eval(self.queue.makeCommandBuffer(), orThrow: .failedToMakeMTLCommandBuffer)
let computeEncoder = try eval(cmdBuffer.makeComputeCommandEncoder(), orThrow: .failedToMakeComputeCommandEncoder)
// Convert YUV to RGB because the guided filter needs RGB format.
computeEncoder.setComputePipelineState(pipelineStateCompute)
computeEncoder.setTexture(colorYContent, index: 0)
computeEncoder.setTexture(colorCbCrContent, index: 1)
computeEncoder.setTexture(colorRGBTexture, index: 2)
let threadgroupSize = MTLSizeMake(
pipelineStateCompute.threadExecutionWidth,
pipelineStateCompute.maxTotalThreadsPerThreadgroup / pipelineStateCompute.threadExecutionWidth,
1
)
let threadgroupCount = MTLSize(
width: Int(ceil(Float(colorRGBTexture.width) / Float(threadgroupSize.width))),
height: Int(ceil(Float(colorRGBTexture.height) / Float(threadgroupSize.height))),
depth: 1)
computeEncoder.dispatchThreadgroups(threadgroupCount, threadsPerThreadgroup: threadgroupSize)
computeEncoder.endEncoding()
cmdBuffer.commit()
cmdBuffer.waitUntilCompleted()
let ciImageOptions: [CIImageOption: Any] = [
.colorSpace: colorSpace,
.applyOrientationProperty: true
]
var ciImage = try eval(CIImage(mtlTexture: self.colorRGBTexture, options: ciImageOptions), orThrow: .failedToCreateCIImageFromMTLTexture)
ciImage = ciImage.oriented(orientation)
return ciImage
}
public func convertToJPEG(capturedImage pixelBuffer: CVPixelBuffer, orientation: CGImagePropertyOrientation, compressionLevel: Float = 1.0) throws -> Data {
let ciImage = try convertToCIImage(capturedImage: pixelBuffer, orientation: orientation)
let options: [CIImageRepresentationOption: Any] = [
CIImageRepresentationOption(rawValue: kCGImageDestinationLossyCompressionQuality as String): compressionLevel
]
return try eval(self.ciContext.jpegRepresentation(of: ciImage, colorSpace: self.colorSpace, options: options), orThrow: .failedToCreateJPEGRepresentation)
}
public func convertToPNG(capturedImage pixelBuffer: CVPixelBuffer, orientation: CGImagePropertyOrientation) throws -> Data {
let ciImage = try convertToCIImage(capturedImage: pixelBuffer, orientation: orientation)
return try eval(self.ciContext.pngRepresentation(of: ciImage, format: .RGBA8, colorSpace: self.colorSpace), orThrow: .failedToCreatePNGRepresentation)
}
}
// MARK: - errors
extension MTLARFrameProcessor {
enum Error: Swift.Error {
case failedToCreateMTLDevice
case failedToCreateMTLCommandQueue
case failedToMakeMTLFunction(String)
case failedToMakeMTLTexture(MTLTextureDescriptor)
case failedToCreateMTLTextureFromCVPixelBuffer
case failedToCreateCVMetalTextureCache
case failedToMakeMTLCommandBuffer
case failedToMakeComputeCommandEncoder
case failedToCreateCIImageFromMTLTexture
case failedToCreateCGImageFromCIImage
case failedToCreateColorSpace(CFString)
case failedToCreateJPEGRepresentation
case failedToCreatePNGRepresentation
}
}
// MARK: - shader
extension MTLARFrameProcessor {
/// Direct copy of shader from Apple sample project:
/// <https://developer.apple.com/documentation/arkit/arkit_in_ios/environmental_analysis/displaying_a_point_cloud_using_scene_depth>
/// <https://developer.apple.com/documentation/arkit/arkit_in_ios/displaying_an_ar_experience_with_metal>
static let shader: String = """
#include <metal_stdlib>
using namespace metal;
// Convert the Y and CbCr textures into a single RGBA texture.
kernel void convertYCbCrToRGBA(texture2d<float, access::read> colorYtexture [[texture(0)]],
texture2d<float, access::read> colorCbCrtexture [[texture(1)]],
texture2d<float, access::write> colorRGBTexture [[texture(2)]],
uint2 gid [[thread_position_in_grid]])
{
float y = colorYtexture.read(gid).r;
float2 uv = colorCbCrtexture.read(gid / 2).rg;
const float4x4 ycbcrToRGBTransform = float4x4(
float4(+1.0000f, +1.0000f, +1.0000f, +0.0000f),
float4(+0.0000f, -0.3441f, +1.7720f, +0.0000f),
float4(+1.4020f, -0.7141f, +0.0000f, +0.0000f),
float4(-0.7010f, +0.5291f, -0.8860f, +1.0000f)
);
// Sample Y and CbCr textures to get the YCbCr color at the given texture
// coordinate.
float4 ycbcr = float4(y, uv.x, uv.y, 1.0f);
// Return the converted RGB color.
float4 colorSample = ycbcrToRGBTransform * ycbcr;
colorRGBTexture.write(colorSample, uint2(gid.xy));
}
"""
}
// MARK: - texture helper
extension MTLARFrameProcessor {
// Create an empty texture.
static func createTexture(metalDevice: MTLDevice, width: Int, height: Int, usage: MTLTextureUsage, pixelFormat: MTLPixelFormat) throws -> MTLTexture {
let descriptor = MTLTextureDescriptor()
descriptor.pixelFormat = pixelFormat
descriptor.width = width
descriptor.height = height
descriptor.usage = usage
let resTexture = try eval(metalDevice.makeTexture(descriptor: descriptor), orThrow: .failedToMakeMTLTexture(descriptor))
return resTexture
}
}
// Enable `CVPixelBuffer` to output an `MTLTexture`.
extension CVPixelBuffer {
func texture(withFormat pixelFormat: MTLPixelFormat, planeIndex: Int, addToCache cache: CVMetalTextureCache) throws -> MTLTexture {
assert(CVPixelBufferGetIOSurface(self) != nil, "CVPixelBuffer must be backed by an IOSurface")
let width = CVPixelBufferGetWidthOfPlane(self, planeIndex)
let height = CVPixelBufferGetHeightOfPlane(self, planeIndex)
var cvtexture: CVMetalTexture?
let status = CVMetalTextureCacheCreateTextureFromImage(nil, cache, self, nil, pixelFormat, width, height, planeIndex, &cvtexture)
guard
status == kCVReturnSuccess,
let cvtexture,
let texture = CVMetalTextureGetTexture(cvtexture)
else {
throw MTLARFrameProcessor.Error.failedToCreateMTLTextureFromCVPixelBuffer
}
return texture
}
}
// MARK: - eval helper
private func eval<R>(_ block: @autoclosure @escaping () -> R?, orThrow error: MTLARFrameProcessor.Error) throws -> R {
try evaluate(block, orThrow: error)
}
private func eval<R>(_ block: @escaping () -> R?, orThrow error: MTLARFrameProcessor.Error) throws -> R {
try evaluate(block, orThrow: error)
}
private func evaluate<R, E: Swift.Error>(_ block: @escaping () -> R?, orThrow error: E) throws -> R {
guard let result: R = block() else {
throw error
}
return result
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment