masadchattha/SpeechRecognizer.swift

## SpeechRecognizer.swift
/*
Transcribing speech to text
Captures and logs meeting transcripts.
You’ll request access to device hardware like the microphone and
integrate the Speech framework to transcribe live audio to text.
*/

import AVFoundation
import Foundation
import Speech
import SwiftUI

/// A helper for transcribing speech to text using SFSpeechRecognizer and AVAudioEngine.
class SpeechRecognizer: ObservableObject {
    enum RecognizerError: Error {
        case nilRecognizer
        case notAuthorizedToRecognize
        case notPermittedToRecord
        case recognizerIsUnavailable

        var message: String {
            switch self {
            case .nilRecognizer: return "Can't initialize speech recognizer"
            case .notAuthorizedToRecognize: return "Not authorized to recognize speech"
            case .notPermittedToRecord: return "Not permitted to record audio"
            case .recognizerIsUnavailable: return "Recognizer is unavailable"
            }
        }
    }

    @Published var transcript: String = ""

    private var audioEngine: AVAudioEngine?
    private var request: SFSpeechAudioBufferRecognitionRequest?
    private var task: SFSpeechRecognitionTask?
    private let recognizer: SFSpeechRecognizer?

    /**
     Initializes a new speech recognizer. If this is the first time you've used the class, it
     requests access to the speech recognizer and the microphone.
     */
    init() {
        recognizer = SFSpeechRecognizer()

        Task(priority: .background) {
            do {
                guard recognizer != nil else {
                    throw RecognizerError.nilRecognizer
                }
                guard await SFSpeechRecognizer.hasAuthorizationToRecognize() else {
                    throw RecognizerError.notAuthorizedToRecognize
                }
                guard await AVAudioSession.sharedInstance().hasPermissionToRecord() else {
                    throw RecognizerError.notPermittedToRecord
                }
            } catch {
                speakError(error)
            }
        }
    }

    /*init() {
        recognizer = SFSpeechRecognizer()

        do {
            guard recognizer != nil else {
                throw RecognizerError.nilRecognizer
            }
            guard SFSpeechRecognizer.hasAuthorizationToRecognize() else {
                throw RecognizerError.notAuthorizedToRecognize
            }
            guard AVAudioSession.sharedInstance().hasPermissionToRecord() else {
                throw RecognizerError.notPermittedToRecord
            }
        } catch {
            speakError(error)
        }
    }*/

    deinit {
        reset()
    }

    /**
        Begin transcribing audio.

        Creates a `SFSpeechRecognitionTask` that transcribes speech to text until you call `stopTranscribing()`.
        The resulting transcription is continuously written to the published `transcript` property.
     */
    func transcribe() {
        DispatchQueue(label: "Speech Recognizer Queue", qos: .background).async { [weak self] in
            guard let self = self, let recognizer = self.recognizer, recognizer.isAvailable else {
                self?.speakError(RecognizerError.recognizerIsUnavailable)
                return
            }

            do {
                let (audioEngine, request) = try Self.prepareEngine()
                self.audioEngine = audioEngine
                self.request = request

                self.task = recognizer.recognitionTask(with: request) { result, error in
                    let receivedFinalResult = result?.isFinal ?? false
                    let receivedError = error != nil // != nil mean there's error (true)

                    if receivedFinalResult || receivedError {
                        audioEngine.stop()
                        audioEngine.inputNode.removeTap(onBus: 0)
                    }

                    if let result = result {
                        self.speak(result.bestTranscription.formattedString)
                    }
                }
            } catch {
                self.reset()
                self.speakError(error)
            }
        }
    }

    /// Stop transcribing audio.
    func stopTranscribing() {
        reset()
    }

    /// Reset the speech recognizer.
    func reset() {
        task?.cancel()
        audioEngine?.stop()
        audioEngine = nil
        request = nil
        task = nil
    }

    private static func prepareEngine() throws -> (AVAudioEngine, SFSpeechAudioBufferRecognitionRequest) {
        let audioEngine = AVAudioEngine()

        let request = SFSpeechAudioBufferRecognitionRequest()
        request.shouldReportPartialResults = true

        let audioSession = AVAudioSession.sharedInstance()
        try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
        try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
        let inputNode = audioEngine.inputNode

        let recordingFormat = inputNode.outputFormat(forBus: 0)
        inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) {
            (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
            request.append(buffer)
        }
        audioEngine.prepare()
        try audioEngine.start()

        return (audioEngine, request)
    }

    private func speak(_ message: String) {
        transcript = message
    }

    private func speakError(_ error: Error) {
        var errorMessage = ""
        if let error = error as? RecognizerError {
            errorMessage += error.message
        } else {
            errorMessage += error.localizedDescription
        }
        transcript = "<< \(errorMessage) >>"
    }
}


/*extension SFSpeechRecognizer {
    static func hasAuthorizationToRecognize() -> Bool {
        var isAuthorized = false
        requestAuthorization { status in
            isAuthorized = status == .authorized
        }
        return isAuthorized
    }
}

extension AVAudioSession {
    func hasPermissionToRecord() -> Bool {
        var isPermitted = false
        requestRecordPermission { authorized in
            isPermitted = authorized
        }
        return isPermitted
    }
}*/


extension SFSpeechRecognizer {
    static func hasAuthorizationToRecognize() async -> Bool {
        await withCheckedContinuation { continuation in
            requestAuthorization { status in
                continuation.resume(returning: status == .authorized)
            }
        }
    }
}

extension AVAudioSession {
    func hasPermissionToRecord() async -> Bool {
        await withCheckedContinuation { continuation in
            requestRecordPermission { authorized in
                continuation.resume(returning: authorized)
            }
        }
    }
}


// MARK: - Example to Call Above code for Speech Recognizing
@StateObject var speechRecognizer = SpeechRecognizer()
//
//
//
    ZStack {
        //
        //
        //
    }
    .onAppear {
        // Transcripting Speech
        speechRecognizer.reset()
        speechRecognizer.transcribe()
    }
    .onDisappear {
        // Stop Transcripting
        speechRecognizer.stopTranscribing()
    }
	/*
	Transcribing speech to text
	Captures and logs meeting transcripts.
	You’ll request access to device hardware like the microphone and
	integrate the Speech framework to transcribe live audio to text.
	*/

	import AVFoundation
	import Foundation
	import Speech
	import SwiftUI

	/// A helper for transcribing speech to text using SFSpeechRecognizer and AVAudioEngine.
	class SpeechRecognizer: ObservableObject {
	enum RecognizerError: Error {
	case nilRecognizer
	case notAuthorizedToRecognize
	case notPermittedToRecord
	case recognizerIsUnavailable

	var message: String {
	switch self {
	case .nilRecognizer: return "Can't initialize speech recognizer"
	case .notAuthorizedToRecognize: return "Not authorized to recognize speech"
	case .notPermittedToRecord: return "Not permitted to record audio"
	case .recognizerIsUnavailable: return "Recognizer is unavailable"
	}
	}
	}

	@Published var transcript: String = ""

	private var audioEngine: AVAudioEngine?
	private var request: SFSpeechAudioBufferRecognitionRequest?
	private var task: SFSpeechRecognitionTask?
	private let recognizer: SFSpeechRecognizer?

	/**
	Initializes a new speech recognizer. If this is the first time you've used the class, it
	requests access to the speech recognizer and the microphone.
	*/
	init() {
	recognizer = SFSpeechRecognizer()

	Task(priority: .background) {
	do {
	guard recognizer != nil else {
	throw RecognizerError.nilRecognizer
	}
	guard await SFSpeechRecognizer.hasAuthorizationToRecognize() else {
	throw RecognizerError.notAuthorizedToRecognize
	}
	guard await AVAudioSession.sharedInstance().hasPermissionToRecord() else {
	throw RecognizerError.notPermittedToRecord
	}
	} catch {
	speakError(error)
	}
	}
	}

	/*init() {
	recognizer = SFSpeechRecognizer()

	do {
	guard recognizer != nil else {
	throw RecognizerError.nilRecognizer
	}
	guard SFSpeechRecognizer.hasAuthorizationToRecognize() else {
	throw RecognizerError.notAuthorizedToRecognize
	}
	guard AVAudioSession.sharedInstance().hasPermissionToRecord() else {
	throw RecognizerError.notPermittedToRecord
	}
	} catch {
	speakError(error)
	}
	}*/

	deinit {
	reset()
	}

	/**
	Begin transcribing audio.

	Creates a `SFSpeechRecognitionTask` that transcribes speech to text until you call `stopTranscribing()`.
	The resulting transcription is continuously written to the published `transcript` property.
	*/
	func transcribe() {
	DispatchQueue(label: "Speech Recognizer Queue", qos: .background).async { [weak self] in
	guard let self = self, let recognizer = self.recognizer, recognizer.isAvailable else {
	self?.speakError(RecognizerError.recognizerIsUnavailable)
	return
	}

	do {
	let (audioEngine, request) = try Self.prepareEngine()
	self.audioEngine = audioEngine
	self.request = request

	self.task = recognizer.recognitionTask(with: request) { result, error in
	let receivedFinalResult = result?.isFinal ?? false
	let receivedError = error != nil // != nil mean there's error (true)

	if receivedFinalResult \|\| receivedError {
	audioEngine.stop()
	audioEngine.inputNode.removeTap(onBus: 0)
	}

	if let result = result {
	self.speak(result.bestTranscription.formattedString)
	}
	}
	} catch {
	self.reset()
	self.speakError(error)
	}
	}
	}

	/// Stop transcribing audio.
	func stopTranscribing() {
	reset()
	}

	/// Reset the speech recognizer.
	func reset() {
	task?.cancel()
	audioEngine?.stop()
	audioEngine = nil
	request = nil
	task = nil
	}

	private static func prepareEngine() throws -> (AVAudioEngine, SFSpeechAudioBufferRecognitionRequest) {
	let audioEngine = AVAudioEngine()

	let request = SFSpeechAudioBufferRecognitionRequest()
	request.shouldReportPartialResults = true

	let audioSession = AVAudioSession.sharedInstance()
	try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
	try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
	let inputNode = audioEngine.inputNode

	let recordingFormat = inputNode.outputFormat(forBus: 0)
	inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) {
	(buffer: AVAudioPCMBuffer, when: AVAudioTime) in
	request.append(buffer)
	}
	audioEngine.prepare()
	try audioEngine.start()

	return (audioEngine, request)
	}

	private func speak(_ message: String) {
	transcript = message
	}

	private func speakError(_ error: Error) {
	var errorMessage = ""
	if let error = error as? RecognizerError {
	errorMessage += error.message
	} else {
	errorMessage += error.localizedDescription
	}
	transcript = "<< \(errorMessage) >>"
	}
	}


	/*extension SFSpeechRecognizer {
	static func hasAuthorizationToRecognize() -> Bool {
	var isAuthorized = false
	requestAuthorization { status in
	isAuthorized = status == .authorized
	}
	return isAuthorized
	}
	}

	extension AVAudioSession {
	func hasPermissionToRecord() -> Bool {
	var isPermitted = false
	requestRecordPermission { authorized in
	isPermitted = authorized
	}
	return isPermitted
	}
	}*/


	extension SFSpeechRecognizer {
	static func hasAuthorizationToRecognize() async -> Bool {
	await withCheckedContinuation { continuation in
	requestAuthorization { status in
	continuation.resume(returning: status == .authorized)
	}
	}
	}
	}

	extension AVAudioSession {
	func hasPermissionToRecord() async -> Bool {
	await withCheckedContinuation { continuation in
	requestRecordPermission { authorized in
	continuation.resume(returning: authorized)
	}
	}
	}
	}


	// MARK: - Example to Call Above code for Speech Recognizing
	@StateObject var speechRecognizer = SpeechRecognizer()
	//
	//
	//
	ZStack {
	//
	//
	//
	}
	.onAppear {
	// Transcripting Speech
	speechRecognizer.reset()
	speechRecognizer.transcribe()
	}
	.onDisappear {
	// Stop Transcripting
	speechRecognizer.stopTranscribing()
	}