Skip to content

Instantly share code, notes, and snippets.

@JoshuaSullivan
Created October 1, 2017 01:38
Show Gist options
  • Save JoshuaSullivan/765ba8cfb03ca7bbf14ef68731872750 to your computer and use it in GitHub Desktop.
Save JoshuaSullivan/765ba8cfb03ca7bbf14ef68731872750 to your computer and use it in GitHub Desktop.
The source code for an object that helps you sample RGB values from ARFrames.
//
// CapturedImageSampler.swift
// ARKitTest
//
// Created by Joshua Sullivan on 9/22/17.
// Copyright © 2017 Joshua Sullivan. All rights reserved.
//
import UIKit
import ARKit
import Accelerate
/// This object provides easy color sampling of the `capturedImage` property of an ARFrame.
///
/// - Warning: This class is NOT thread safe. The `rawRGBBuffer` property is shared between instances
/// and will cause a lot of headaches if 2 instances try to simultaneously access it.
/// If you need multi-threading, make the shared buffer an instance property instead.
/// Just remember to release it when you're done with it.
class CapturedImageSampler {
/// This is the format of the pixel buffer included with the ARFrame.
private static let expectedPixelFormat: OSType = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange
/// This is the YCbCr to RGB conversion opaque object used by the convert function.
private static var conversionMatrix: vImage_YpCbCrToARGB = {
var pixelRange = vImage_YpCbCrPixelRange(Yp_bias: 0, CbCr_bias: 128, YpRangeMax: 255, CbCrRangeMax: 255, YpMax: 255, YpMin: 1, CbCrMax: 255, CbCrMin: 0)
var matrix = vImage_YpCbCrToARGB()
vImageConvert_YpCbCrToARGB_GenerateConversion(kvImage_YpCbCrToARGBMatrix_ITU_R_709_2, &pixelRange, &matrix, kvImage420Yp8_CbCr8, kvImageARGB8888, UInt32(kvImageNoFlags))
return matrix
}()
/// Since we'll generally be dealing with buffers of the same size, save processsing power
/// by re-using a single, static one, rather than allocating a new buffer each time. This
/// will be set by the initializer of the first instance of `CapturedImageSampler`.
private static var rawRGBBuffer: UnsafeMutableRawPointer!
/// Store the size information of the buffer.
private static var rawBufferSize: CGSize = .zero
/// The errors which can be produced.
enum PixelError: Error {
/// The `capturedImage` property's CVPixelBuffer was in an unexpected format.
case incorrectPixelFormat
/// Failed to allocate space for the conversion buffer.
case systemFailure
/// The conversion function returned an error.
case conversionFailure(vImage_Error)
}
/// A private pointer to a cast version of CapturedImageSampler.rawRGBBuffer.
private var rgb: UnsafePointer<UInt8>
/// Stored buffer dimension information.
private let rgbSize: BufferDimension
/// Initialize the CapturedImageSampler with an `ARFrame` to sample RGB colors from the
/// captured image that it contains.
/// - Parameter frame: An `ARFrame` instance that you wish to sample for RGB colors.
init(frame: ARFrame) throws {
// Get the image pixel buffer.
let pixelBuffer = frame.capturedImage
// Double-check that the format hasn't changed unexpectedly.
guard CVPixelBufferGetPixelFormatType(pixelBuffer) == CapturedImageSampler.expectedPixelFormat else {
NSLog("ERROR: ARFrame.capturedImage had an unexpected pixel format.")
throw PixelError.incorrectPixelFormat
}
// Lock the base address for work.
CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
// Ensure that we can find both the Y and the CbCr planes.
guard
let rawyBuffer = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0),
let rawcbcrBuffer = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1)
else {
print("Unable to find correct planar data in source pixel buffer.")
throw PixelError.incorrectPixelFormat
}
// Note: The Y plane has the same size as our output image. The CbCr plane is 1/2 resolution.
// However, the conversion method accounts for this, so we don't need to worry.
let ySize = BufferDimension(pixelBuffer: pixelBuffer, plane: 0)
let cSize = BufferDimension(pixelBuffer: pixelBuffer, plane: 1)
// Convert the individual planes to the vImage_Buffer type via a convenience method on the size.
var yBuffer = ySize.buffer(with: rawyBuffer)
var cbcrBuffer = cSize.buffer(with: rawcbcrBuffer)
// Check to see if the static RGB buffer is the correct size.
if ySize.size != CapturedImageSampler.rawBufferSize && CapturedImageSampler.rawRGBBuffer != nil {
// It's the wrong size. Free it and nil the reference so we can recreate it below.
free(CapturedImageSampler.rawRGBBuffer)
CapturedImageSampler.rawRGBBuffer = nil
}
// Check to see if the static buffer exists.
if CapturedImageSampler.rawRGBBuffer == nil {
// If it doesn't exist, create it. Size = width * height * 1 byte per channel (ARGB).
guard let buffer = malloc(ySize.width * ySize.height * 4) else {
print("ERROR: Unable to allocate space for RGB buffer.")
throw PixelError.systemFailure
}
CapturedImageSampler.rawRGBBuffer = buffer
}
// At this point we know the static buffer exists. Use it to create the target RGB vImage_Buffer.
var rgbBuffer: vImage_Buffer = vImage_Buffer(data: CapturedImageSampler.rawRGBBuffer, height: ySize.uHeight, width: ySize.uWidth, rowBytes: ySize.width * 4)
// Put everything together to convert the Y and CbCr planes into a single, interleaved ARGB buffer.
// Note: The declared constants for kvImageFlags are the wrong type: they're all Int, but should be UInt32.
// I've filed a radar about it.
let error = vImageConvert_420Yp8_CbCr8ToARGB8888(&yBuffer, &cbcrBuffer, &rgbBuffer, &CapturedImageSampler.conversionMatrix, nil, 255, UInt32(kvImageNoFlags))
// Check to see that the returned error type was No Error.
if error != kvImageNoError {
NSLog("Error converting buffer: \(error)")
throw PixelError.conversionFailure(error)
}
// We're done with the original pixel buffer, unlock it.
CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly)
// All we want is a buffer of bytes, we'll do the address math manually.
rgb = unsafeBitCast(rgbBuffer.data, to: UnsafePointer<UInt8>.self)
// Store the dimensions of the buffer so we can do offset math and check that our static buffer is
// the correct size for the next instance of this class.
rgbSize = BufferDimension(width: ySize.width, height: ySize.height, bytesPerRow: ySize.width * 4)
}
/// Get the RGB color of the pixel at the specified coordinates.
///
/// - Parameter atX: A scalar float in the range 0.0..<1.0.
/// - Parameter y: A scalar float in the range 0.0..<1.0.
/// - Returns: An optional UIColor, based on whether or not valid coordinates were supplied.
///
/// - Note: The scalar values for x and y are easily obtained by dividing the x and y of your
/// target point by the width and height of the image, respectively. This is done so
/// that there is agreement between the sample and the displayed image, even if the
/// latter is scaled up or down.
func getColor(atX x: CGFloat, y: CGFloat) -> UIColor? {
guard x >= 0 && x < 1 && y >= 0 && y < 1 else {
// The coordinate is outside the valid range.
return nil
}
// The buffer order is horizontally flipped from the displayed orientation, so we'll
// flip the x coordinate. Make sure it never equals 1.0.
let xFlipped = min(1 - x, 0.9999999)
// The ARFrame's pixel buffer has the long axis horizontally, so we're going to swap the
// x and y coordinates to ensure we get the color corresponding to the correct screen
// location when the phone is in portrait orientation.
let tx = Int(y * CGFloat(rgbSize.width))
let ty = Int(xFlipped * CGFloat(rgbSize.height))
// Calculate the starting index.
let index = ty * rgbSize.bytesPerRow + tx * 4
// Grab the ARGB bytes and convert them into scalar CGFloats.
let a = CGFloat(rgb[index]) / 255.0
let r = CGFloat(rgb[index + 1]) / 255.0
let g = CGFloat(rgb[index + 2]) / 255.0
let b = CGFloat(rgb[index + 3]) / 255.0
// Return the resulting color.
return UIColor(red: r, green: g, blue: b, alpha: a)
}
/// This helper method prints out a bunch of information about a `CVPixelBuffer`. Call this
/// to see what is going on if you're having an incorrect pixel format error.
static func inspect(pixelBuffer: CVPixelBuffer) {
print("Beginning pixel buffer inspection.")
let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer)
print("Raw pixel format: \(pixelFormat)")
let pixelFormatString = str(from: pixelFormat)
print("Pixel format: \(pixelFormatString)")
let isPlanar = CVPixelBufferIsPlanar(pixelBuffer)
print("Is planar: \(isPlanar)")
if (isPlanar) {
let planeCount = CVPixelBufferGetPlaneCount(pixelBuffer)
print("Plane count: \(planeCount)")
for planeIndex in 0..<planeCount {
print("[Plane \(planeIndex)]")
if let baseAddress = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, planeIndex) {
print("baseAddress: \(baseAddress)")
}
let planeWidth = CVPixelBufferGetWidthOfPlane(pixelBuffer, planeIndex)
let planeHeight = CVPixelBufferGetHeightOfPlane(pixelBuffer, planeIndex)
let planeRowSize = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, planeIndex)
print("width: \(planeWidth) / height: \(planeHeight) / rowSize: \(planeRowSize)")
}
} else {
let width = CVPixelBufferGetWidth(pixelBuffer)
let height = CVPixelBufferGetHeight(pixelBuffer)
let rowSize = CVPixelBufferGetBytesPerRow(pixelBuffer)
print("width: \(width) / height: \(height) / rowSize: \(rowSize)")
}
}
/// Get a human-readable string from an OSType object.
static func str(from os: OSType) -> String {
var str = ""
str.append(String(UnicodeScalar((os >> 24) & 0xFF)!))
str.append(String(UnicodeScalar((os >> 16) & 0xFF)!))
str.append(String(UnicodeScalar((os >> 8) & 0xFF)!))
str.append(String(UnicodeScalar(os & 0xFF)!))
return str
}
}
/// This struct stores dimensional information about a buffer, needed to correctly address its
/// consitiuent elements.
struct BufferDimension {
let width: Int
let height: Int
let bytesPerRow: Int
/// The total number of bytes in a buffer with this dimension. Useful for bounds-checking
/// array access of the buffer's elements.
var byteCount: Int {
return width * bytesPerRow * height
}
/// Convience getter to cast the width as a UInt, which is required by vImage_Buffer.
var uWidth: UInt {
return UInt(width)
}
/// Convience getter to cast the height as a UInt, which is required by vImage_Buffer.
var uHeight: UInt {
return UInt(height)
}
/// Convenience getter to get the size (width, height) of the buffer.
var size: CGSize {
return CGSize(width: width, height: height)
}
/// Initialize the dimension with a CVPixelBuffer and optional plane index.
/// This will misbehave if you have a multi-planar image and you don't specify a plane,
/// or if you specify a plane index and it is not planar.
init(pixelBuffer: CVPixelBuffer, plane: Int?) {
if let plane = plane {
self.width = CVPixelBufferGetWidthOfPlane(pixelBuffer, plane)
self.height = CVPixelBufferGetHeightOfPlane(pixelBuffer, plane)
self.bytesPerRow = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, plane)
} else {
self.width = CVPixelBufferGetWidth(pixelBuffer)
self.height = CVPixelBufferGetHeight(pixelBuffer)
self.bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
}
}
/// Alternative initializer for directly setting dimension values.
init(width: Int, height: Int, bytesPerRow: Int) {
self.width = width
self.height = height
self.bytesPerRow = bytesPerRow
}
/// Create and return a `vImage_Buffer` using the dimensions and a supplied buffer.
func buffer(with data: UnsafeMutableRawPointer) -> vImage_Buffer {
return vImage_Buffer(data: data, height: uHeight, width: uWidth, rowBytes: bytesPerRow)
}
}
@Vangos
Copy link

Vangos commented Feb 20, 2021

Thank you very much for providing such a clear and elegant solution, @JoshuaSullivan.

For everyone who's asking how to use the code, here's an elementary example (for clarity reasons, I've omitted to check for null/empty data):

public func session(_ session: ARSession, didUpdate frame: ARFrame) {

    let colorData = frame.capturedImage
    
    let sampler = try? CapturedImageSampler(pixelBuffer: buffer)
    let color = sampler!.getColor(atX: 0, y: 0)
}

Also, consider reading Joshua's blog post for tips and warnings.

@yura-kinoo
Copy link

Hi, is there any way to do, or explain how it would work, kCVPixelFormatType_420YpCbCr8PlanarFullRange to Bi-planar or RGB that AVAssetWriter can use?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment