Created March 7, 2023 19:36
Transcribing Live Audio Speech to text in SwiftUI.
Transcribing speech to text
Captures and logs meeting transcripts.
You’ll request access to device hardware like the microphone and
integrate the Speech framework to transcribe live audio to text.
import AVFoundation
import Foundation
import Speech
import SwiftUI
/// A helper for transcribing speech to text using SFSpeechRecognizer and AVAudioEngine.
class SpeechRecognizer: ObservableObject {
enum RecognizerError: Error {
case nilRecognizer
case notAuthorizedToRecognize
case notPermittedToRecord
case recognizerIsUnavailable
var message: String {
switch self {
case .nilRecognizer: return "Can't initialize speech recognizer"
case .notAuthorizedToRecognize: return "Not authorized to recognize speech"
case .notPermittedToRecord: return "Not permitted to record audio"
case .recognizerIsUnavailable: return "Recognizer is unavailable"
@Published var transcript: String = ""
private var audioEngine: AVAudioEngine?
private var request: SFSpeechAudioBufferRecognitionRequest?
private var task: SFSpeechRecognitionTask?
private let recognizer: SFSpeechRecognizer?
Initializes a new speech recognizer. If this is the first time you've used the class, it
requests access to the speech recognizer and the microphone.
init() {
recognizer = SFSpeechRecognizer()
Task(priority: .background) {
do {
guard recognizer != nil else {
throw RecognizerError.nilRecognizer
guard await SFSpeechRecognizer.hasAuthorizationToRecognize() else {
throw RecognizerError.notAuthorizedToRecognize
guard await AVAudioSession.sharedInstance().hasPermissionToRecord() else {
throw RecognizerError.notPermittedToRecord
} catch {
/*init() {
recognizer = SFSpeechRecognizer()
do {
guard recognizer != nil else {
throw RecognizerError.nilRecognizer
guard SFSpeechRecognizer.hasAuthorizationToRecognize() else {
throw RecognizerError.notAuthorizedToRecognize
guard AVAudioSession.sharedInstance().hasPermissionToRecord() else {
throw RecognizerError.notPermittedToRecord
} catch {
deinit {
Begin transcribing audio.
Creates a `SFSpeechRecognitionTask` that transcribes speech to text until you call `stopTranscribing()`.
The resulting transcription is continuously written to the published `transcript` property.
func transcribe() {
DispatchQueue(label: "Speech Recognizer Queue", qos: .background).async { [weak self] in
guard let self = self, let recognizer = self.recognizer, recognizer.isAvailable else {
do {
let (audioEngine, request) = try Self.prepareEngine()
self.audioEngine = audioEngine
self.request = request
self.task = recognizer.recognitionTask(with: request) { result, error in
let receivedFinalResult = result?.isFinal ?? false
let receivedError = error != nil // != nil mean there's error (true)
if receivedFinalResult || receivedError {
audioEngine.inputNode.removeTap(onBus: 0)
if let result = result {
} catch {
/// Stop transcribing audio.
func stopTranscribing() {
/// Reset the speech recognizer.
func reset() {
audioEngine = nil
request = nil
task = nil
private static func prepareEngine() throws -> (AVAudioEngine, SFSpeechAudioBufferRecognitionRequest) {
let audioEngine = AVAudioEngine()
let request = SFSpeechAudioBufferRecognitionRequest()
request.shouldReportPartialResults = true
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
let inputNode = audioEngine.inputNode
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) {
(buffer: AVAudioPCMBuffer, when: AVAudioTime) in
try audioEngine.start()
return (audioEngine, request)
private func speak(_ message: String) {
transcript = message
private func speakError(_ error: Error) {
var errorMessage = ""
if let error = error as? RecognizerError {
errorMessage += error.message
} else {
errorMessage += error.localizedDescription
transcript = "<< \(errorMessage) >>"
/*extension SFSpeechRecognizer {
static func hasAuthorizationToRecognize() -> Bool {
var isAuthorized = false
requestAuthorization { status in
isAuthorized = status == .authorized
return isAuthorized
extension AVAudioSession {
func hasPermissionToRecord() -> Bool {
var isPermitted = false
requestRecordPermission { authorized in
isPermitted = authorized
return isPermitted
extension SFSpeechRecognizer {
static func hasAuthorizationToRecognize() async -> Bool {
await withCheckedContinuation { continuation in
requestAuthorization { status in
continuation.resume(returning: status == .authorized)
extension AVAudioSession {
func hasPermissionToRecord() async -> Bool {
await withCheckedContinuation { continuation in
requestRecordPermission { authorized in
continuation.resume(returning: authorized)
// MARK: - Example to Call Above code for Speech Recognizing
@StateObject var speechRecognizer = SpeechRecognizer()
ZStack {
.onAppear {
// Transcripting Speech
.onDisappear {
// Stop Transcripting
