Skip to content

Instantly share code, notes, and snippets.

@samsonjs
Last active June 28, 2024 17:53
Show Gist options
  • Save samsonjs/cdcca883bad44e5ee2da5a7392ae92aa to your computer and use it in GitHub Desktop.
Save samsonjs/cdcca883bad44e5ee2da5a7392ae92aa to your computer and use it in GitHub Desktop.
Exporting videos with AVFoundation in the strict concurrency world of Swift 6
//
// Created by Sami Samhuri on 2024-06-26.
//
import Foundation
final class SendableWrapper<T>: @unchecked Sendable {
private var unsafeValue: T
private let lock = NSLock()
var value: T {
get {
lock.withLock { unsafeValue }
}
set {
lock.withLock { unsafeValue = newValue }
}
}
init(_ value: T) {
unsafeValue = value
}
}
//
// Created by Sami Samhuri on 2024-06-26.
// Based on SDAVAssetExportSession.
//
import Foundation
import AVFoundation
final class VFAExportSession {
/// Enables video composition and parameters for the session.
var videoComposition: AVVideoComposition? {
get { sendableBag.videoComposition }
set { sendableBag.videoComposition = newValue }
}
/// Enables audio mixing and parameters for the session.
var audioMix: AVAudioMix? {
get { sendableBag.audioMix }
set { sendableBag.audioMix = newValue }
}
/// Audio output configuration dictionary, using keys defined in `<AVFoundation/AVAudioSettings.h>`
var audioOutputConfiguration: [String: Any] {
get { sendableBag.audioOutputConfiguration }
set { sendableBag.audioOutputConfiguration = newValue }
}
/// Video output configuration dictionary, using keys defined in `<AVFoundation/AVVideoSettings.h>`
var videoOutputConfiguration: [String: Any] {
get { sendableBag.videoOutputConfiguration }
set { sendableBag.videoOutputConfiguration = newValue }
}
/// Time range or limit of an export from `CMTime.zero` to `CMTime.positiveInfinity`
var timeRange = CMTimeRange(start: .zero, end: .positiveInfinity)
/// Indicates if an export should be optimized for network use.
var optimizeForNetworkUse: Bool = false
private let sendableBag: SendableBag
/// Initializes a session with an asset to export.
///
/// - Parameter asset: The asset to export.
init(asset: sending AVAsset) {
self.sendableBag = SendableBag(asset: asset)
}
func export(
to outputURL: URL,
as fileType: AVFileType
) async throws {
let (asset, audioMix, audioOutputConfiguration, videoComposition, videoOutputConfiguration) = try sendableBag.consume()
let duration = try await asset.load(.duration)
let videoTracks = try await asset.sendTracks(withMediaType: .video)
let audioTracks = try await asset.sendTracks(withMediaType: .audio)
let encoder = try await VFASampleEncoder(
asset: asset,
assetDuration: duration,
audioTracks: audioTracks,
audioMix: audioMix,
audioOutputConfiguration: audioOutputConfiguration,
videoTracks: videoTracks,
videoComposition: videoComposition,
videoOutputConfiguration: videoOutputConfiguration,
outputURL: outputURL,
fileType: fileType
) { progress in
#warning("FIXME: do something with progress")
}
do {
try await encoder.encode()
} catch {
try? FileManager.default.removeItem(at: outputURL)
throw error
}
}
private class SendableBag: @unchecked Sendable {
private let lock = NSLock()
private var unsafeAsset: AVAsset?
private var unsafeAudioMix: AVAudioMix?
var audioMix: AVAudioMix? {
get { lock.withLock { unsafeAudioMix } }
set { lock.withLock { unsafeAudioMix = newValue } }
}
private var unsafeAudioOutputConfiguration: [String: Any] = [:]
var audioOutputConfiguration: [String: Any] {
get { lock.withLock { unsafeAudioOutputConfiguration } }
set { lock.withLock { unsafeAudioOutputConfiguration = newValue } }
}
private var unsafeVideoComposition: AVVideoComposition?
var videoComposition: AVVideoComposition? {
get { lock.withLock { unsafeVideoComposition } }
set { lock.withLock { unsafeVideoComposition = newValue } }
}
private var unsafeVideoOutputConfiguration: [String: Any] = [:]
var videoOutputConfiguration: [String: Any] {
get { lock.withLock { unsafeVideoOutputConfiguration } }
set { lock.withLock { unsafeVideoOutputConfiguration = newValue } }
}
init(asset: sending AVAsset) {
self.unsafeAsset = asset
}
func consume() throws -> sending (AVAsset, AVAudioMix?, [String: Any], AVVideoComposition?, [String: Any]) {
try lock.withLock {
guard let asset = unsafeAsset else { throw CancellationError() }
defer {
self.unsafeAsset = nil
self.unsafeAudioMix = nil
self.unsafeAudioOutputConfiguration = [:]
self.unsafeVideoComposition = nil
self.unsafeVideoOutputConfiguration = [:]
}
return (asset, unsafeAudioMix, unsafeAudioOutputConfiguration, unsafeVideoComposition, unsafeVideoOutputConfiguration)
}
}
}
}
//
// Created by Sami Samhuri on 2024-06-26.
// Based on SDAVAssetExportSession.
//
import AVFoundation
actor VFASampleEncoder {
enum Error: LocalizedError, CustomStringConvertible {
case setupFailure
case readFailure((any Swift.Error)?)
case writeFailure((any Swift.Error)?)
public var description: String {
get {
switch self {
case .setupFailure:
return "technical mumbo jumbo"
case .readFailure:
return "what did you feed me"
case .writeFailure:
return "we're going nowhere"
}
}
}
public var errorDescription: String? {
description
}
}
let queue = DispatchSerialQueue(label: "VFASampleEncoder", autoreleaseFrequency: .workItem, target: .global())
nonisolated var unownedExecutor: UnownedSerialExecutor {
queue.asUnownedSerialExecutor()
}
let reader: AVAssetReader
let videoOutput: AVAssetReaderOutput
let audioOutput: AVAssetReaderOutput?
let writer: AVAssetWriter
let videoInput: AVAssetWriterInput
let audioInput: AVAssetWriterInput?
let pixelBufferAdaptor: AVAssetWriterInputPixelBufferAdaptor
let timeRange: CMTimeRange
let duration: TimeInterval
let updateProgress: @MainActor @Sendable (Float) -> Void
private var progress: Float = 0.0
private var lastSamplePresentationTime: CMTime = .zero
init(
reader: AVAssetReader,
audioOutput: AVAssetReaderOutput?,
videoOutput: AVAssetReaderOutput,
writer: AVAssetWriter,
audioInput: AVAssetWriterInput?,
videoInput: AVAssetWriterInput,
pixelBufferAdaptor: AVAssetWriterInputPixelBufferAdaptor,
timeRange: CMTimeRange,
duration: TimeInterval,
updateProgress: @escaping @MainActor @Sendable (Float) -> Void
) {
self.reader = reader
self.audioOutput = audioOutput
self.videoOutput = videoOutput
self.writer = writer
self.audioInput = audioInput
self.videoInput = videoInput
self.pixelBufferAdaptor = pixelBufferAdaptor
self.timeRange = timeRange
self.duration = duration
self.updateProgress = updateProgress
}
init(
asset: AVAsset,
assetDuration: CMTime,
audioTracks: [AVAssetTrack],
audioMix: AVAudioMix?,
audioOutputConfiguration: [String: Any],
videoTracks: [AVAssetTrack],
videoComposition: AVVideoComposition?,
videoOutputConfiguration: [String: Any],
outputURL: URL,
fileType: AVFileType,
timeRange: CMTimeRange = CMTimeRange(start: .zero, duration: .positiveInfinity),
optimizeForNetworkUse: Bool = true,
updateProgress: @escaping @MainActor @Sendable (Float) -> Void
) async throws {
precondition(!videoTracks.isEmpty)
guard let width = videoComposition.map({ Int($0.renderSize.width) })
?? (videoOutputConfiguration[AVVideoWidthKey] as? NSNumber)?.intValue,
let height = videoComposition.map({ Int($0.renderSize.height) })
?? (videoOutputConfiguration[AVVideoHeightKey] as? NSNumber)?.intValue else {
NSLog("Export dimensions must be provided in a video composition or video output configuration")
throw Error.setupFailure
}
let duration =
if timeRange.duration.isValid && !timeRange.duration.isPositiveInfinity {
timeRange.duration.seconds
} else {
assetDuration.seconds
}
let reader = try Self.setUpReader(asset: asset, timeRange: timeRange)
let writer = try Self.setUpWriter(outputURL: outputURL, fileType: fileType, optimizeForNetworkUse: optimizeForNetworkUse)
guard writer.canApply(outputSettings: videoOutputConfiguration, forMediaType: .video) else {
throw Error.setupFailure
}
let videoOutput = AVAssetReaderVideoCompositionOutput(videoTracks: videoTracks, videoSettings: nil)
videoOutput.alwaysCopiesSampleData = false
videoOutput.videoComposition = videoComposition
if reader.canAdd(videoOutput) {
reader.add(videoOutput)
}
let videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: videoOutputConfiguration)
if writer.canAdd(videoInput) {
writer.add(videoInput)
}
let pixelBufferAttributes: [String: Any] = [
kCVPixelBufferPixelFormatTypeKey as String: NSNumber(integerLiteral: Int(kCVPixelFormatType_32RGBA)),
kCVPixelBufferWidthKey as String: NSNumber(integerLiteral: width),
kCVPixelBufferHeightKey as String: NSNumber(integerLiteral: height),
"IOSurfaceOpenGLESTextureCompatibility": NSNumber(booleanLiteral: true),
"IOSurfaceOpenGLESFBOCompatibility": NSNumber(booleanLiteral: true),
]
let pixelBufferAdaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: videoInput, sourcePixelBufferAttributes: pixelBufferAttributes)
let audioOutput: AVAssetReaderOutput?
let audioInput: AVAssetWriterInput?
if audioTracks.isEmpty {
audioOutput = nil
audioInput = nil
} else {
let output = AVAssetReaderAudioMixOutput(audioTracks: audioTracks, audioSettings: nil)
if reader.canAdd(output) {
reader.add(output)
}
let input = AVAssetWriterInput(mediaType: AVMediaType.audio, outputSettings: audioOutputConfiguration)
if writer.canAdd(input) {
writer.add(input)
}
audioOutput = output
audioInput = input
}
self.init(
reader: reader,
audioOutput: audioOutput,
videoOutput: videoOutput,
writer: writer,
audioInput: audioInput,
videoInput: videoInput,
pixelBufferAdaptor: pixelBufferAdaptor,
timeRange: timeRange,
duration: duration,
updateProgress: updateProgress
)
}
private static func setUpReader(asset: AVAsset, timeRange: CMTimeRange) throws -> AVAssetReader {
let reader = try AVAssetReader(asset: asset)
reader.timeRange = timeRange
return reader
}
private static func setUpWriter(
outputURL: URL,
fileType: AVFileType,
optimizeForNetworkUse: Bool
) throws -> AVAssetWriter {
let writer = try AVAssetWriter(outputURL: outputURL, fileType: fileType)
writer.shouldOptimizeForNetworkUse = optimizeForNetworkUse
return writer
}
func encode() async throws {
writer.startWriting()
reader.startReading()
writer.startSession(atSourceTime: timeRange.start)
return try await withCheckedThrowingContinuation { continuation in
let audioFinished = SendableWrapper(false)
let videoFinished = SendableWrapper(false)
@Sendable func checkFinished() {
guard audioFinished.value, videoFinished.value else { return }
Task {
await self.complete { result in
continuation.resume(with: result)
}
}
}
if let audioInput {
audioInput.requestMediaDataWhenReady(on: queue) {
let hasMoreSamples = self.encodeReadySamples(type: .audio)
if !hasMoreSamples {
audioFinished.value = true
checkFinished()
}
}
} else {
audioFinished.value = true
}
videoInput.requestMediaDataWhenReady(on: queue) {
let hasMoreSamples = self.encodeReadySamples(type: .video)
if !hasMoreSamples {
videoFinished.value = true
checkFinished()
}
}
}
}
private func complete(completion: @escaping @Sendable (Result<Void, Swift.Error>) -> Void) {
if reader.status == .cancelled || writer.status == .cancelled {
completion(.failure(CancellationError()))
} else if writer.status == .failed {
reader.cancelReading()
completion(.failure(Error.writeFailure(writer.error)))
} else if reader.status == .failed {
writer.cancelWriting()
completion(.failure(Error.readFailure(reader.error)))
} else {
writer.finishWriting {
completion(.success(()))
}
}
}
nonisolated private func encodeReadySamples(type: AVMediaType) -> Bool {
assumeIsolated { _self in
_self.actuallyEncodeReadySamples(type: type)
}
}
private func actuallyEncodeReadySamples(type: AVMediaType) -> Bool {
let output: AVAssetReaderOutput?
let input: AVAssetWriterInput?
switch type {
case .audio:
output = audioOutput
input = audioInput
case .video:
output = videoOutput
input = videoInput
default:
assertionFailure("Unsupported media type \(type)")
return false
}
guard let output, let input else {
assertionFailure("No output or input for media type \(type)")
return false
}
while input.isReadyForMoreMediaData {
guard reader.status == .reading && writer.status == .writing,
let sampleBuffer = output.copyNextSampleBuffer() else {
input.markAsFinished()
NSLog("Finished encoding \(type) samples")
return false
}
lastSamplePresentationTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer) - timeRange.start
let progress = Float(lastSamplePresentationTime.seconds / duration)
DispatchQueue.main.async { self.updateProgress(progress) }
switch type {
case .audio:
guard input.append(sampleBuffer) else {
NSLog("Failed to append audio sample buffer \(sampleBuffer) to input \(input)")
return false
}
case .video:
guard let pixelBufferPool = pixelBufferAdaptor.pixelBufferPool else {
NSLog("No pixel buffer pool available on adaptor \(pixelBufferAdaptor)")
return false
}
var toRenderBuffer: CVPixelBuffer?
let result = CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pixelBufferPool, &toRenderBuffer)
var handled = false
if result == kCVReturnSuccess, let toBuffer = toRenderBuffer {
handled = pixelBufferAdaptor.append(toBuffer, withPresentationTime: lastSamplePresentationTime)
if !handled { return false }
}
if !handled {
guard input.append(sampleBuffer) else {
NSLog("Failed to append video sample buffer \(sampleBuffer) to input \(input)")
return false
}
}
default:
assertionFailure("Unsupported media type: \(type)")
return false
}
}
// Everything was appended successfully, return true indicating there's more to do.
NSLog("Completed encoding ready \(type) samples, more to come...")
return true
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment