Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save theoknock/cd02da8f99bcdd7a8acb264b86266d44 to your computer and use it in GitHub Desktop.
Save theoknock/cd02da8f99bcdd7a8acb264b86266d44 to your computer and use it in GitHub Desktop.
import SwiftUI
import Speech
import AVFoundation
import Combine
import Observation
@Observable
class SpeechRecognizer {
var transcription: String = ""
var isTranscribing: Bool = false
private var speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var audioEngine = AVAudioEngine()
func startTranscribing() {
SFSpeechRecognizer.requestAuthorization { authStatus in
switch authStatus {
case .authorized:
DispatchQueue.main.async {
self.isTranscribing = true
}
self.startRecording()
case .denied,
.restricted,
.notDetermined:
print("Speech recognition not authorized")
@unknown default:
fatalError("Unknown authorization status")
}
}
}
private func startRecording() {
recognitionTask?.cancel()
recognitionTask = nil
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
} catch {
print("Failed to set up audio session")
return
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else {
print("Unable to create a recognition request")
return
}
recognitionRequest.shouldReportPartialResults = true
let inputNode = audioEngine.inputNode
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.removeTap(onBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("Audio engine couldn't start")
return
}
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest) { result, error in
if let result = result {
DispatchQueue.main.async {
self.transcription = result.bestTranscription.formattedString
}
}
if error != nil || result?.isFinal == true {
self.stopTranscribing()
}
}
}
func stopTranscribing() {
audioEngine.stop()
audioEngine.inputNode.removeTap(onBus: 0)
recognitionRequest?.endAudio()
recognitionTask?.cancel()
DispatchQueue.main.async {
self.isTranscribing = false
}
}
}
struct ContentView: View {
@State private var speechRecognizer = SpeechRecognizer()
var body: some View {
VStack {
Text(speechRecognizer.transcription)
.padding()
HStack {
Button(action: {
speechRecognizer.startTranscribing()
}) {
Text("Start Transcribing")
.padding()
.background(speechRecognizer.isTranscribing ? Color.gray : Color.blue)
.foregroundColor(.white)
.cornerRadius(10)
}
.disabled(speechRecognizer.isTranscribing)
Button(action: {
speechRecognizer.stopTranscribing()
}) {
Text("Stop Transcribing")
.padding()
.background(speechRecognizer.isTranscribing ? Color.red : Color.gray)
.foregroundColor(.white)
.cornerRadius(10)
}
.disabled(!speechRecognizer.isTranscribing)
}
}
.padding()
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment