Created
March 7, 2023 19:36
-
-
Save masadchattha/bb2c6257c90d8cab5fa6b98061381ab5 to your computer and use it in GitHub Desktop.
Transcribing Live Audio Speech to text in SwiftUI.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Transcribing speech to text | |
Captures and logs meeting transcripts. | |
You’ll request access to device hardware like the microphone and | |
integrate the Speech framework to transcribe live audio to text. | |
*/ | |
import AVFoundation | |
import Foundation | |
import Speech | |
import SwiftUI | |
/// A helper for transcribing speech to text using SFSpeechRecognizer and AVAudioEngine. | |
class SpeechRecognizer: ObservableObject { | |
enum RecognizerError: Error { | |
case nilRecognizer | |
case notAuthorizedToRecognize | |
case notPermittedToRecord | |
case recognizerIsUnavailable | |
var message: String { | |
switch self { | |
case .nilRecognizer: return "Can't initialize speech recognizer" | |
case .notAuthorizedToRecognize: return "Not authorized to recognize speech" | |
case .notPermittedToRecord: return "Not permitted to record audio" | |
case .recognizerIsUnavailable: return "Recognizer is unavailable" | |
} | |
} | |
} | |
@Published var transcript: String = "" | |
private var audioEngine: AVAudioEngine? | |
private var request: SFSpeechAudioBufferRecognitionRequest? | |
private var task: SFSpeechRecognitionTask? | |
private let recognizer: SFSpeechRecognizer? | |
/** | |
Initializes a new speech recognizer. If this is the first time you've used the class, it | |
requests access to the speech recognizer and the microphone. | |
*/ | |
init() { | |
recognizer = SFSpeechRecognizer() | |
Task(priority: .background) { | |
do { | |
guard recognizer != nil else { | |
throw RecognizerError.nilRecognizer | |
} | |
guard await SFSpeechRecognizer.hasAuthorizationToRecognize() else { | |
throw RecognizerError.notAuthorizedToRecognize | |
} | |
guard await AVAudioSession.sharedInstance().hasPermissionToRecord() else { | |
throw RecognizerError.notPermittedToRecord | |
} | |
} catch { | |
speakError(error) | |
} | |
} | |
} | |
/*init() { | |
recognizer = SFSpeechRecognizer() | |
do { | |
guard recognizer != nil else { | |
throw RecognizerError.nilRecognizer | |
} | |
guard SFSpeechRecognizer.hasAuthorizationToRecognize() else { | |
throw RecognizerError.notAuthorizedToRecognize | |
} | |
guard AVAudioSession.sharedInstance().hasPermissionToRecord() else { | |
throw RecognizerError.notPermittedToRecord | |
} | |
} catch { | |
speakError(error) | |
} | |
}*/ | |
deinit { | |
reset() | |
} | |
/** | |
Begin transcribing audio. | |
Creates a `SFSpeechRecognitionTask` that transcribes speech to text until you call `stopTranscribing()`. | |
The resulting transcription is continuously written to the published `transcript` property. | |
*/ | |
func transcribe() { | |
DispatchQueue(label: "Speech Recognizer Queue", qos: .background).async { [weak self] in | |
guard let self = self, let recognizer = self.recognizer, recognizer.isAvailable else { | |
self?.speakError(RecognizerError.recognizerIsUnavailable) | |
return | |
} | |
do { | |
let (audioEngine, request) = try Self.prepareEngine() | |
self.audioEngine = audioEngine | |
self.request = request | |
self.task = recognizer.recognitionTask(with: request) { result, error in | |
let receivedFinalResult = result?.isFinal ?? false | |
let receivedError = error != nil // != nil mean there's error (true) | |
if receivedFinalResult || receivedError { | |
audioEngine.stop() | |
audioEngine.inputNode.removeTap(onBus: 0) | |
} | |
if let result = result { | |
self.speak(result.bestTranscription.formattedString) | |
} | |
} | |
} catch { | |
self.reset() | |
self.speakError(error) | |
} | |
} | |
} | |
/// Stop transcribing audio. | |
func stopTranscribing() { | |
reset() | |
} | |
/// Reset the speech recognizer. | |
func reset() { | |
task?.cancel() | |
audioEngine?.stop() | |
audioEngine = nil | |
request = nil | |
task = nil | |
} | |
private static func prepareEngine() throws -> (AVAudioEngine, SFSpeechAudioBufferRecognitionRequest) { | |
let audioEngine = AVAudioEngine() | |
let request = SFSpeechAudioBufferRecognitionRequest() | |
request.shouldReportPartialResults = true | |
let audioSession = AVAudioSession.sharedInstance() | |
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers) | |
try audioSession.setActive(true, options: .notifyOthersOnDeactivation) | |
let inputNode = audioEngine.inputNode | |
let recordingFormat = inputNode.outputFormat(forBus: 0) | |
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { | |
(buffer: AVAudioPCMBuffer, when: AVAudioTime) in | |
request.append(buffer) | |
} | |
audioEngine.prepare() | |
try audioEngine.start() | |
return (audioEngine, request) | |
} | |
private func speak(_ message: String) { | |
transcript = message | |
} | |
private func speakError(_ error: Error) { | |
var errorMessage = "" | |
if let error = error as? RecognizerError { | |
errorMessage += error.message | |
} else { | |
errorMessage += error.localizedDescription | |
} | |
transcript = "<< \(errorMessage) >>" | |
} | |
} | |
/*extension SFSpeechRecognizer { | |
static func hasAuthorizationToRecognize() -> Bool { | |
var isAuthorized = false | |
requestAuthorization { status in | |
isAuthorized = status == .authorized | |
} | |
return isAuthorized | |
} | |
} | |
extension AVAudioSession { | |
func hasPermissionToRecord() -> Bool { | |
var isPermitted = false | |
requestRecordPermission { authorized in | |
isPermitted = authorized | |
} | |
return isPermitted | |
} | |
}*/ | |
extension SFSpeechRecognizer { | |
static func hasAuthorizationToRecognize() async -> Bool { | |
await withCheckedContinuation { continuation in | |
requestAuthorization { status in | |
continuation.resume(returning: status == .authorized) | |
} | |
} | |
} | |
} | |
extension AVAudioSession { | |
func hasPermissionToRecord() async -> Bool { | |
await withCheckedContinuation { continuation in | |
requestRecordPermission { authorized in | |
continuation.resume(returning: authorized) | |
} | |
} | |
} | |
} | |
// MARK: - Example to Call Above code for Speech Recognizing | |
@StateObject var speechRecognizer = SpeechRecognizer() | |
// | |
// | |
// | |
ZStack { | |
// | |
// | |
// | |
} | |
.onAppear { | |
// Transcripting Speech | |
speechRecognizer.reset() | |
speechRecognizer.transcribe() | |
} | |
.onDisappear { | |
// Stop Transcripting | |
speechRecognizer.stopTranscribing() | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment