Skip to content

Instantly share code, notes, and snippets.

@hotpaw2
Last active April 3, 2024 03:10
Show Gist options
  • Save hotpaw2/ba815fc23b5d642705f2b1dedfaf0107 to your computer and use it in GitHub Desktop.
Save hotpaw2/ba815fc23b5d642705f2b1dedfaf0107 to your computer and use it in GitHub Desktop.
Swift Audio Recording class. Reads buffers of input samples from the microphone using the iOS RemoteIO Audio Unit API
//
// RecordAudio.swift
//
// This is a Swift class (updated for Swift 5)
// that uses the iOS RemoteIO Audio Unit
// to record audio input samples,
// (should be instantiated as a singleton object.)
//
// Created by Ronald Nicholson on 10/21/16.
// Copyright © 2017,2019 HotPaw Productions. All rights reserved.
// http://www.nicholson.com/rhn/
// Distribution permission: BSD 2-clause license
//
import Foundation
import AVFoundation
import AudioUnit
// call setupAudioSessionForRecording() during controlling view load
// call startRecording() to start recording in a later UI call
final class RecordAudio: NSObject {
var audioUnit: AudioUnit? = nil
var micPermission = false
var sessionActive = false
var isRecording = false
var sampleRate : Double = 44100.0 // default audio sample rate
let circBuffSize = 32768 // lock-free circular fifo/buffer size
var circBuffer = [Float](repeating: 0, count: 32768) // for incoming samples
var circInIdx : Int = 0
var audioLevel : Float = 0.0
private var hwSRate = 48000.0 // guess of device hardware sample rate
private var micPermissionDispatchToken = 0
private var interrupted = false // for restart from audio interruption notification
func startRecording() {
if isRecording { return }
startAudioSession()
if sessionActive {
startAudioUnit()
}
}
var numberOfChannels: Int = 2
private let outputBus: UInt32 = 0
private let inputBus: UInt32 = 1
func startAudioUnit() {
var err: OSStatus = noErr
if self.audioUnit == nil {
setupAudioUnit() // setup once
}
guard let au = self.audioUnit
else { return }
err = AudioUnitInitialize(au)
gTmp0 = Int(err)
if err != noErr { return }
err = AudioOutputUnitStart(au) // start
gTmp0 = Int(err)
if err == noErr {
isRecording = true
}
}
func startAudioSession() {
if (sessionActive == false) {
// set and activate Audio Session
do {
let audioSession = AVAudioSession.sharedInstance()
if (micPermission == false) {
if (micPermissionDispatchToken == 0) {
micPermissionDispatchToken = 1
audioSession.requestRecordPermission({(granted: Bool)-> Void in
if granted {
self.micPermission = true
return
// check for this flag and call from UI loop if needed
} else {
gTmp0 += 1
// dispatch in main/UI thread an alert
// informing that mic permission is not switched on
}
})
}
}
if micPermission == false { return }
try audioSession.setCategory(AVAudioSession.Category.record)
// choose 44100 or 48000 based on hardware rate
// sampleRate = 44100.0
var preferredIOBufferDuration = 0.0058 // 5.8 milliseconds = 256 samples
hwSRate = audioSession.sampleRate // get native hardware rate
if hwSRate == 48000.0 { sampleRate = 48000.0 } // set session to hardware rate
if hwSRate == 48000.0 { preferredIOBufferDuration = 0.0053 }
let desiredSampleRate = sampleRate
try audioSession.setPreferredSampleRate(desiredSampleRate)
try audioSession.setPreferredIOBufferDuration(preferredIOBufferDuration)
NotificationCenter.default.addObserver(
forName: AVAudioSession.interruptionNotification,
object: nil,
queue: nil,
using: myAudioSessionInterruptionHandler )
try audioSession.setActive(true)
sessionActive = true
} catch /* let error as NSError */ {
// handle error here
}
}
}
private func setupAudioUnit() {
var componentDesc: AudioComponentDescription
= AudioComponentDescription(
componentType: OSType(kAudioUnitType_Output),
componentSubType: OSType(kAudioUnitSubType_RemoteIO),
componentManufacturer: OSType(kAudioUnitManufacturer_Apple),
componentFlags: UInt32(0),
componentFlagsMask: UInt32(0) )
var osErr: OSStatus = noErr
let component: AudioComponent! = AudioComponentFindNext(nil, &componentDesc)
var tempAudioUnit: AudioUnit?
osErr = AudioComponentInstanceNew(component, &tempAudioUnit)
self.audioUnit = tempAudioUnit
guard let au = self.audioUnit
else { return }
// Enable I/O for input.
var one_ui32: UInt32 = 1
osErr = AudioUnitSetProperty(au,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input,
inputBus,
&one_ui32,
UInt32(MemoryLayout<UInt32>.size))
// Set format to 32-bit Floats, linear PCM
let nc = 2 // 2 channel stereo
var streamFormatDesc:AudioStreamBasicDescription = AudioStreamBasicDescription(
mSampleRate: Double(sampleRate),
mFormatID: kAudioFormatLinearPCM,
mFormatFlags: ( kAudioFormatFlagsNativeFloatPacked ),
mBytesPerPacket: UInt32(nc * MemoryLayout<UInt32>.size),
mFramesPerPacket: 1,
mBytesPerFrame: UInt32(nc * MemoryLayout<UInt32>.size),
mChannelsPerFrame: UInt32(nc),
mBitsPerChannel: UInt32(8 * (MemoryLayout<UInt32>.size)),
mReserved: UInt32(0)
)
osErr = AudioUnitSetProperty(au,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input, outputBus,
&streamFormatDesc,
UInt32(MemoryLayout<AudioStreamBasicDescription>.size))
osErr = AudioUnitSetProperty(au,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output,
inputBus,
&streamFormatDesc,
UInt32(MemoryLayout<AudioStreamBasicDescription>.size))
var inputCallbackStruct
= AURenderCallbackStruct(inputProc: recordingCallback,
inputProcRefCon:
UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque()))
osErr = AudioUnitSetProperty(au,
AudioUnitPropertyID(kAudioOutputUnitProperty_SetInputCallback),
AudioUnitScope(kAudioUnitScope_Global),
inputBus,
&inputCallbackStruct,
UInt32(MemoryLayout<AURenderCallbackStruct>.size))
// Ask CoreAudio to allocate buffers for us on render.
// Is this true by default?
osErr = AudioUnitSetProperty(au,
AudioUnitPropertyID(kAudioUnitProperty_ShouldAllocateBuffer),
AudioUnitScope(kAudioUnitScope_Output),
inputBus,
&one_ui32,
UInt32(MemoryLayout<UInt32>.size))
gTmp0 = Int(osErr)
}
let recordingCallback: AURenderCallback = { (
inRefCon,
ioActionFlags,
inTimeStamp,
inBusNumber,
frameCount,
ioData ) -> OSStatus in
let audioObject = unsafeBitCast(inRefCon, to: RecordAudio.self)
var err: OSStatus = noErr
// set mData to nil, AudioUnitRender() should be allocating buffers
var bufferList = AudioBufferList(
mNumberBuffers: 1,
mBuffers: AudioBuffer(
mNumberChannels: UInt32(2),
mDataByteSize: 16,
mData: nil))
if let au = audioObject.audioUnit {
err = AudioUnitRender(au,
ioActionFlags,
inTimeStamp,
inBusNumber,
frameCount,
&bufferList)
}
audioObject.processMicrophoneBuffer( inputDataList: &bufferList,
frameCount: UInt32(frameCount) )
return 0
}
func processMicrophoneBuffer( // process RemoteIO Buffer from mic input
inputDataList : UnsafeMutablePointer<AudioBufferList>,
frameCount : UInt32 )
{
let inputDataPtr = UnsafeMutableAudioBufferListPointer(inputDataList)
let mBuffers : AudioBuffer = inputDataPtr[0]
let count = Int(frameCount)
// Microphone Input Analysis
// let data = UnsafePointer<Int16>(mBuffers.mData)
let bufferPointer = UnsafeMutableRawPointer(mBuffers.mData)
if let bptr = bufferPointer {
let dataArray = bptr.assumingMemoryBound(to: Float.self)
var sum : Float = 0.0
var j = self.circInIdx
let m = self.circBuffSize
for i in 0..<(count/2) {
let x = Float(dataArray[i+i ]) // copy left channel sample
let y = Float(dataArray[i+i+1]) // copy right channel sample
self.circBuffer[j ] = x
self.circBuffer[j + 1] = y
j += 2 ; if j >= m { j = 0 } // into circular buffer
sum += x * x + y * y
}
self.circInIdx = j // circular index will always be less than size
// measuredMicVol_1 = sqrt( Float(sum) / Float(count) ) // scaled volume
if sum > 0.0 && count > 0 {
let tmp = 5.0 * (logf(sum / Float(count)) + 20.0)
let r : Float = 0.2
audioLevel = r * tmp + (1.0 - r) * audioLevel
}
}
}
func stopRecording() {
AudioUnitUninitialize(self.audioUnit!)
isRecording = false
}
func myAudioSessionInterruptionHandler(notification: Notification) -> Void {
let interuptionDict = notification.userInfo
if let interuptionType = interuptionDict?[AVAudioSessionInterruptionTypeKey] {
let interuptionVal = AVAudioSession.InterruptionType(
rawValue: (interuptionType as AnyObject).uintValue )
if (interuptionVal == AVAudioSession.InterruptionType.began) {
if (isRecording) {
stopRecording()
isRecording = false
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setActive(false)
sessionActive = false
} catch {
}
interrupted = true
}
} else if (interuptionVal == AVAudioSession.InterruptionType.ended) {
if (interrupted) {
// potentially restart here
}
}
}
}
}
// end of class RecordAudio
final class RecordAudio_v2: NSObject {
var auAudioUnit: AUAudioUnit! = nil
var enableRecording = true
var audioSessionActive = false
var audioSetupComplete = false
var isRecording = false
var sampleRate : Double = 48000.0 // desired audio sample rate
let circBuffSize = 32768 // lock-free circular fifo/buffer size
var circBuffer = [Float](repeating: 0, count: 32768)
var circInIdx : Int = 0 // sample input index
var circOutIdx : Int = 0 // sample output index
var audioLevel : Float = 0.0
private var micPermissionRequested = false
private var micPermissionGranted = false
// for restart from audio interruption notification
private var audioInterrupted = false
private var renderBlock : AURenderBlock? = nil
func startRecording() {
if isRecording { return }
if audioSessionActive == false {
// configure and activate Audio Session, this might change the sampleRate
setupAudioSessionForRecording()
}
guard micPermissionGranted && audioSessionActive else { return }
let audioFormat = AVAudioFormat(
commonFormat: AVAudioCommonFormat.pcmFormatInt16, // pcmFormatInt16, pcmFormatFloat32,
sampleRate: Double(sampleRate), // 44100.0 48000.0
channels:AVAudioChannelCount(2), // 1 or 2
interleaved: true ) // true for interleaved stereo
if (auAudioUnit == nil) {
setupRemoteIOAudioUnitForRecord(audioFormat: audioFormat!)
}
renderBlock = auAudioUnit.renderBlock // returns AURenderBlock()
if ( enableRecording
&& micPermissionGranted
&& audioSetupComplete
&& audioSessionActive
&& isRecording == false ) {
auAudioUnit.isInputEnabled = true
auAudioUnit.outputProvider = { // AURenderPullInputBlock()
(actionFlags, timestamp, frameCount, inputBusNumber, inputData) -> AUAudioUnitStatus in
if let block = self.renderBlock { // AURenderBlock?
let err : OSStatus = block(actionFlags,
timestamp,
frameCount,
1,
inputData,
.none)
if err == noErr {
// save samples from current input buffer to circular buffer
self.recordMicrophoneInputSamples(
inputDataList: inputData,
frameCount: UInt32(frameCount) )
}
}
let err2 : AUAudioUnitStatus = noErr
return err2
}
do {
circInIdx = 0 // initialize circular buffer pointers
circOutIdx = 0
try auAudioUnit.allocateRenderResources()
try auAudioUnit.startHardware() // equivalent to AudioOutputUnitStart ???
isRecording = true
} catch {
// placeholder for error handling
}
}
}
func stopRecording() {
if (isRecording) {
auAudioUnit.stopHardware()
isRecording = false
}
if (audioSessionActive) {
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setActive(false)
} catch /* let error as NSError */ {
}
audioSessionActive = false
}
}
private func recordMicrophoneInputSamples( // process RemoteIO Buffer from mic input
inputDataList : UnsafeMutablePointer<AudioBufferList>,
frameCount : UInt32 )
{
let inputDataPtr = UnsafeMutableAudioBufferListPointer(inputDataList)
let mBuffers : AudioBuffer = inputDataPtr[0]
let count = Int(frameCount)
let bufferPointer = UnsafeMutableRawPointer(mBuffers.mData)
var j = self.circInIdx // current circular array input index
let n = self.circBuffSize
var audioLevelSum : Float = 0.0
if let bptr = bufferPointer?.assumingMemoryBound(to: Int16.self) {
for i in 0..<(count/2) {
// Save samples in circular buffer for latter processing
let x = Float(bptr[i+i ])
let y = Float(bptr[i+i+1])
self.circBuffer[j ] = x // Stereo Left
self.circBuffer[j + 1] = y // Stereo Right
j += 2 ; if j >= n { j = 0 } // Circular buffer looping
// Microphone Input Analysis
audioLevelSum += x * x + y * y
}
}
OSMemoryBarrier(); // from libkern/OSAtomic.h
self.circInIdx = j // circular index will always be less than size
if audioLevelSum > 0.0 && count > 0 {
audioLevel = logf(audioLevelSum / Float(count))
}
}
// set up and activate Audio Session
func setupAudioSessionForRecording() {
do {
let audioSession = AVAudioSession.sharedInstance()
if (micPermissionGranted == false) {
if (micPermissionRequested == false) {
micPermissionRequested = true
audioSession.requestRecordPermission({(granted: Bool)-> Void in
if granted {
self.micPermissionGranted = true
self.startRecording()
return
} else {
self.enableRecording = false
// dispatch in main/UI thread an alert
// informing that mic permission is not switched on
}
})
}
return
}
if enableRecording {
try audioSession.setCategory(AVAudioSession.Category.record)
}
let preferredIOBufferDuration = 0.0053 // 5.3 milliseconds = 256 samples
try audioSession.setPreferredSampleRate(sampleRate) // at 48000.0
try audioSession.setPreferredIOBufferDuration(preferredIOBufferDuration)
NotificationCenter.default.addObserver(
forName: AVAudioSession.interruptionNotification,
object: nil,
queue: nil,
using: myAudioSessionInterruptionHandler )
try audioSession.setActive(true)
audioSessionActive = true
} catch /* let error as NSError */ {
// placeholder for error handling
}
}
// find and set up the sample format for the RemoteIO Audio Unit
private func setupRemoteIOAudioUnitForRecord(audioFormat : AVAudioFormat) {
do {
let audioComponentDescription = AudioComponentDescription(
componentType: kAudioUnitType_Output,
componentSubType: kAudioUnitSubType_RemoteIO,
componentManufacturer: kAudioUnitManufacturer_Apple,
componentFlags: 0,
componentFlagsMask: 0 )
try auAudioUnit = AUAudioUnit(componentDescription: audioComponentDescription)
// bus 1 is for data that the microphone exports out to the handler block
let bus1 = auAudioUnit.outputBusses[1]
try bus1.setFormat(audioFormat) // for microphone bus
audioSetupComplete = true
} catch /* let error as NSError */ {
// placeholder for error handling
}
}
private func myAudioSessionInterruptionHandler(notification: Notification) -> Void {
let interuptionDict = notification.userInfo
if let interuptionType = interuptionDict?[AVAudioSessionInterruptionTypeKey] {
let interuptionVal = AVAudioSession.InterruptionType(
rawValue: (interuptionType as AnyObject).uintValue )
if (interuptionVal == AVAudioSession.InterruptionType.began) {
// [self beginInterruption];
if (isRecording) {
auAudioUnit.stopHardware()
isRecording = false
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setActive(false)
audioSessionActive = false
} catch {
// placeholder for error handling
}
audioInterrupted = true
}
} else if (interuptionVal == AVAudioSession.InterruptionType.ended) {
// [self endInterruption];
if (audioInterrupted) {
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setActive(true)
audioSessionActive = true
if (auAudioUnit.renderResourcesAllocated == false) {
try auAudioUnit.allocateRenderResources()
}
try auAudioUnit.startHardware()
isRecording = true
} catch {
// placeholder for error handling
}
}
}
}
}
} // end of RecordAudio class
// eof
@manishlnmcbm
Copy link

manishlnmcbm commented Jul 29, 2019

OK, you are right. Checking this indeed shows that iPhone hardware in my case sets the actual sampling rate to 16000 if I ask it for 8000. Well, that's according to the docs, as we are just setting the preferred values.
However, we can still rely on the CoreAudio to deliver us a resampled frame stream. Even though the sampling rate of the AudioSession (i.e. the hardware sampling rate) is 16000, we can ask for the audio format on the input bus to be sampled at 8000.

(lldb) po audioSession.ioBufferDuration
0.00800000037997961

(lldb) po audioSession.sampleRate
16000.0

(lldb) po audioFormat.sampleRate
8000.0

Above you can see that the system has not accepted my requested 0.02 ioBufferDuration, and also has set the sampling rate to 16k instead of 8. But the audio format I'm setting on the AudioUnit has 8k rate, and here is my callback being called:

(lldb) po frameCount
64

Let's do the math. 16000 * 0.008 = 128. This is the expected number of frames with the system buffer duration and 16k sample rate. However, we are receiving exactly 1/2 of that, which means that CoreAudio has resampled the input for us.

Good. Now we only have to collect 160 samples and push them downstream.

First of all, we will need our buffer. If you say that your codec doesn't have an input buffer then we'll have to use our circular buffer.

Secondly, you could go and implement your parallel timer logic which would fire once in 20 ms exactly and consume the data from our buffer. And that can't be "wrong" as long as it works. There may be cases when such a solution is preferred (for example, if your encoding is taking too much time)
But I say you might not need that. Every time you receive a callback with an additional 64 frames you first push them into the buffer and then check if your buffer has now 160 or more frames. If yes, then immediately call downstream, synchronously, consuming just the 160 frames and leaving the extra frames in the buffer for the next cycle.
If you are concerned about calling the downstream synchronously, you can always do that with an async block scheduled for another queue. Just make sure that your circular buffer is thread-safe then. Technically I'd say, give the synchronous solution a go first.

Why the circularBuff is float type in your code? If i try changing circBuffer from float to Int32 then it's giving crash Line number 443 (This link: https://gist.github.com/leonid-s-usov/dcd674b0a8baf96123cac6c4e08e3e0c ), sum += Float32(x*x). Actually AMR Codec is converting internally into char type and then to short array of 160 size. What type we have to send from circBuffer because if i select 160 size then the total bytes is 640. But Codec has 320 bytes conversion and appending data in NSMutableData with [out appendBytes:amrFrame length:byte_counter]. After iteration it return total AMR Data.

//See this snippet
NSData *EncodePCMToAMR(char *data, int maxLen,int nChannels, int nBitsPerSample) {
char oldBuf = data;
/
input speech vector */
short speech[160];

/* counters */
int byte_counter, frames = 0, bytes = 0;

}

One more question is how to convert this circBuffer array to PCM data as my Codec support PCM to AMR?

@leonid-s-usov
Copy link

@manishlnmcbm with all respect, I would like to limit this thread currently as it looks like it departs too far from the topic of the code gist

First of all, I am not taking any credit for the code, it's not mine. I've used it as boilerplate and changed some parts and fixed some minor issues, which I have submitted to the public via my account.

Regarding your questions, I believe I've given enough information about using the new version 3 of audiounit to record real time audio to feed to downstream coders, including the suggestion to change the type of the circular buffer to address your requirements.

If you encounter problems refactoring the working code from Float32 to another type then I suggest that you turn for help regarding more basic programming concepts at stackoverflow or any other education platform.

Best regards.

@mahdidham
Copy link

Hi, i am looking some swift code to record and stream it through server. However i am still new about this. So is this code actually can prepare the recorded buffer audio to some binary pcm/wav data to send it to server? If it does, how? Would you explain it? That might be helped. Thanks,

@dfeinzimer
Copy link

I'm encountering the error IPCAUClient: can't connect to server (-66748) upon calling startRecording() and haven't yet found any solutions.

It looks like this error is triggered within setupRemoteIOAudioUnitForRecord(audioFormat : AVAudioFormat) from

let bus1 = auAudioUnit.outputBusses[1]

Any suggestions?

The outputProvider block isn't getting called and I have a hunch it's related to this.

@billyPersistent
Copy link

There are periodic gaps in the recorded audio. One for every AudioUnit render. I don't think it's a problem with the indexing in processMicrophoneBuffer, and inTimeStamp is incrementing perfectly with respect to frameCount. The size of the gaps is related to frames per slice (set indirectly through prefferedIOBufferDuration). Is this a latency issue? Any insight would be appreciated.

@skyend
Copy link

skyend commented Jan 9, 2023

Thank you very much! 🙏🙏🙏🙏

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment