Skip to content

Instantly share code, notes, and snippets.

@willwade
Last active October 17, 2024 11:08
Show Gist options
  • Save willwade/93e709147d7ce9a5f80a3c3944f8e331 to your computer and use it in GitHub Desktop.
Save willwade/93e709147d7ce9a5f80a3c3944f8e331 to your computer and use it in GitHub Desktop.
import objc
from AVFoundation import AVSpeechSynthesizer, AVSpeechUtterance, AVSpeechSynthesisVoice, AVAudioEngine, AVAudioFile, AVAudioSession
from Foundation import NSURL, NSObject
class AVSpeechDriver(NSObject):
def __init__(self):
self._proxy = None
self._tts = None
self._audio_engine = AVAudioEngine.alloc().init()
self._audio_file = None
@objc.python_method
def initWithProxy(self, proxy):
self = objc.super(AVSpeechDriver, self).init()
if self is None:
return None
self._proxy = proxy
self._tts = AVSpeechSynthesizer.alloc().init()
self._tts.setDelegate_(self)
return self
@objc.python_method
def say(self, text, ssml=False):
self._proxy.setBusy(True)
self._current_text = text
self._proxy.notify("started-utterance")
# Check for SSML support
if ssml:
utterance = AVSpeechUtterance.ssmlRepresentation_(text)
if not utterance:
print("Failed to create SSML utterance")
return
else:
utterance = AVSpeechUtterance.speechUtteranceWithString_(text)
# Set voice and rate
utterance.voice = AVSpeechSynthesisVoice.voiceWithLanguage_("en-US")
utterance.rate = 0.5
# Start speaking
self._tts.speakUtterance_(utterance)
@objc.python_method
def save_to_file(self, text, filename, ssml=False):
# Configure audio session
AVAudioSession.sharedInstance().setCategory_error_("playAndRecord", None)
AVAudioSession.sharedInstance().setActive_error_(True, None)
# Setup file output
output_url = NSURL.fileURLWithPath_(filename)
self._audio_file = AVAudioFile.alloc().initForWriting_settings_error_(output_url, self._audio_engine.outputNode.outputFormatForBus_(0).settings(), None)
# Configure audio engine
self._audio_engine.inputNode.installTapOnBus_bufferSize_format_block_(
0, 1024, self._audio_engine.inputNode.outputFormatForBus_(0),
lambda buffer, time: self._audio_file.writeFromBuffer_error_(buffer, None)
)
# Start audio engine and speaking
self._audio_engine.prepare()
self._audio_engine.startAndReturnError_(None)
# Start speaking
self.say(text, ssml=ssml)
def stop(self):
if self._tts.isSpeaking():
self._tts.stopSpeakingAtBoundary_(0)
if self._audio_engine.isRunning():
self._audio_engine.stop()
self._audio_engine.inputNode.removeTapOnBus_(0)
def speechSynthesizer_didFinishSpeaking_(self, synthesizer, success):
self._proxy.notify("finished-utterance", completed=success)
self._proxy.setBusy(False)
# Stop and close audio engine after finishing
if self._audio_engine.isRunning():
self._audio_engine.stop()
self._audio_engine.inputNode.removeTapOnBus_(0)
if self._audio_file:
self._audio_file = None
def speechSynthesizer_willSpeakRangeOfSpeechString_utterance_(self, synthesizer, characterRange, utterance):
# Extract the current word based on character range
start, length = characterRange.location, characterRange.length
current_word = self._current_text[start : start + length]
# Notify proxy of the current word being spoken
self._proxy.notify("started-word", name=current_word, location=start, length=length)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment