Last active
October 17, 2024 11:23
-
-
Save willwade/86ef5c0655badc90c2f3fec811c06cde to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import comtypes.client | |
import winreg | |
class SAPI4Driver: | |
def __init__(self, proxy): | |
# Initialize the SAPI 4 VoiceText COM object | |
self._tts = comtypes.client.CreateObject("Speech.VoiceText") | |
self._proxy = proxy | |
self._speaking = False | |
self._stopping = False | |
self._voices = self._enumerate_sapi4_voices() | |
self.setProperty("voice", self._voices[0]) # Set default voice | |
def _enumerate_sapi4_voices(self): | |
# Query the registry for SAPI 4 voices | |
voices = [] | |
key_path = r"SOFTWARE\Microsoft\Speech\Voices\SV4Voices" | |
with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, key_path) as key: | |
for i in range(winreg.QueryInfoKey(key)[0]): | |
subkey_name = winreg.EnumKey(key, i) | |
with winreg.OpenKey(key, subkey_name) as subkey: | |
voice_name = winreg.QueryValueEx(subkey, None)[0] | |
voices.append((subkey_name, voice_name)) | |
return voices | |
def setProperty(self, name, value): | |
if name == "voice": | |
# Find the voice's CLSID in the registry | |
for clsid, voice_name in self._voices: | |
if voice_name == value: | |
self._tts.Register("", clsid) # Set the SAPI 4 voice by CLSID | |
break | |
elif name == "rate": | |
self._tts.Speed = int(value) # SAPI 4 uses Speed instead of Rate | |
elif name == "volume": | |
self._tts.Volume = int(value * 100) # Volume in SAPI 4 is 0-100 | |
else: | |
raise KeyError(f"Unknown property '{name}' for SAPI 4") | |
def getProperty(self, name): | |
if name == "voices": | |
# Return a list of available SAPI 4 voices | |
return [voice_name for _, voice_name in self._voices] | |
elif name == "voice": | |
return self._tts.GetVoice() | |
elif name == "rate": | |
return self._tts.Speed | |
elif name == "volume": | |
return self._tts.Volume / 100.0 | |
else: | |
raise KeyError(f"Unknown property '{name}' for SAPI 4") | |
def say(self, text): | |
self._proxy.setBusy(True) | |
self._proxy.notify("started-utterance") | |
self._speaking = True | |
self._tts.Speak(text) | |
def stop(self): | |
if self._speaking: | |
self._tts.Stop() | |
self._speaking = False | |
self._proxy.notify("finished-utterance", completed=False) | |
self._proxy.setBusy(False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class UnifiedSAPIEngine: | |
def __init__(self, proxy): | |
self._sapi5 = SAPI5Driver(proxy) | |
self._sapi4 = SAPI4Driver(proxy) | |
self._current_driver = self._sapi5 # Default to SAPI 5 | |
def setProperty(self, name, value): | |
if name == "voice": | |
if value in [v[1] for v in self._sapi4.getProperty("voices")]: | |
self._current_driver = self._sapi4 | |
else: | |
self._current_driver = self._sapi5 | |
self._current_driver.setProperty(name, value) | |
def getProperty(self, name): | |
return self._current_driver.getProperty(name) | |
def say(self, text): | |
self._current_driver.say(text) | |
def stop(self): | |
self._current_driver.stop() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import winrt.windows.media.speechsynthesis as speechsynth | |
from winrt.windows.storage.streams import DataReader | |
class UWPDriver: | |
def __init__(self, proxy): | |
self._synthesizer = speechsynth.SpeechSynthesizer() | |
self._proxy = proxy | |
self._voices = self._get_voices() | |
self._current_voice = None | |
def _get_voices(self): | |
voices = [] | |
for voice in speechsynth.SpeechSynthesizer.all_voices(): | |
voices.append((voice.id, voice.display_name)) | |
return voices | |
async def _speak_text_async(self, text): | |
# Synthesize the text to a stream | |
stream = await self._synthesizer.synthesize_text_to_stream_async(text) | |
reader = DataReader(stream) | |
audio_data = bytearray(reader.read_bytes(stream.size)) | |
reader.detach_stream() | |
# Use a method of your choice to play audio data | |
# e.g., play using winsound or save to a WAV file | |
def setProperty(self, name, value): | |
if name == "voice": | |
for voice in speechsynth.SpeechSynthesizer.all_voices(): | |
if voice.display_name == value: | |
self._synthesizer.voice = voice | |
self._current_voice = voice | |
break | |
elif name == "rate": | |
# UWP does not support rate directly in the same way; consider using workarounds | |
pass | |
elif name == "volume": | |
# UWP does not have volume control directly in the SpeechSynthesizer | |
pass | |
else: | |
raise KeyError(f"Unknown property '{name}' for UWP") | |
def getProperty(self, name): | |
if name == "voices": | |
return [voice[1] for voice in self._voices] | |
elif name == "voice": | |
return self._current_voice.display_name if self._current_voice else None | |
else: | |
raise KeyError(f"Unknown property '{name}' for UWP") | |
def say(self, text): | |
self._proxy.setBusy(True) | |
self._proxy.notify("started-utterance") | |
asyncio.run(self._speak_text_async(text)) | |
self._proxy.notify("finished-utterance", completed=True) | |
self._proxy.setBusy(False) | |
def stop(self): | |
# Implement stop if possible or use a workaround | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment