Skip to content

Instantly share code, notes, and snippets.

@willwade
Last active October 17, 2024 11:23
Show Gist options
  • Save willwade/86ef5c0655badc90c2f3fec811c06cde to your computer and use it in GitHub Desktop.
Save willwade/86ef5c0655badc90c2f3fec811c06cde to your computer and use it in GitHub Desktop.
import comtypes.client
import winreg
class SAPI4Driver:
def __init__(self, proxy):
# Initialize the SAPI 4 VoiceText COM object
self._tts = comtypes.client.CreateObject("Speech.VoiceText")
self._proxy = proxy
self._speaking = False
self._stopping = False
self._voices = self._enumerate_sapi4_voices()
self.setProperty("voice", self._voices[0]) # Set default voice
def _enumerate_sapi4_voices(self):
# Query the registry for SAPI 4 voices
voices = []
key_path = r"SOFTWARE\Microsoft\Speech\Voices\SV4Voices"
with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, key_path) as key:
for i in range(winreg.QueryInfoKey(key)[0]):
subkey_name = winreg.EnumKey(key, i)
with winreg.OpenKey(key, subkey_name) as subkey:
voice_name = winreg.QueryValueEx(subkey, None)[0]
voices.append((subkey_name, voice_name))
return voices
def setProperty(self, name, value):
if name == "voice":
# Find the voice's CLSID in the registry
for clsid, voice_name in self._voices:
if voice_name == value:
self._tts.Register("", clsid) # Set the SAPI 4 voice by CLSID
break
elif name == "rate":
self._tts.Speed = int(value) # SAPI 4 uses Speed instead of Rate
elif name == "volume":
self._tts.Volume = int(value * 100) # Volume in SAPI 4 is 0-100
else:
raise KeyError(f"Unknown property '{name}' for SAPI 4")
def getProperty(self, name):
if name == "voices":
# Return a list of available SAPI 4 voices
return [voice_name for _, voice_name in self._voices]
elif name == "voice":
return self._tts.GetVoice()
elif name == "rate":
return self._tts.Speed
elif name == "volume":
return self._tts.Volume / 100.0
else:
raise KeyError(f"Unknown property '{name}' for SAPI 4")
def say(self, text):
self._proxy.setBusy(True)
self._proxy.notify("started-utterance")
self._speaking = True
self._tts.Speak(text)
def stop(self):
if self._speaking:
self._tts.Stop()
self._speaking = False
self._proxy.notify("finished-utterance", completed=False)
self._proxy.setBusy(False)
class UnifiedSAPIEngine:
def __init__(self, proxy):
self._sapi5 = SAPI5Driver(proxy)
self._sapi4 = SAPI4Driver(proxy)
self._current_driver = self._sapi5 # Default to SAPI 5
def setProperty(self, name, value):
if name == "voice":
if value in [v[1] for v in self._sapi4.getProperty("voices")]:
self._current_driver = self._sapi4
else:
self._current_driver = self._sapi5
self._current_driver.setProperty(name, value)
def getProperty(self, name):
return self._current_driver.getProperty(name)
def say(self, text):
self._current_driver.say(text)
def stop(self):
self._current_driver.stop()
import asyncio
import winrt.windows.media.speechsynthesis as speechsynth
from winrt.windows.storage.streams import DataReader
class UWPDriver:
def __init__(self, proxy):
self._synthesizer = speechsynth.SpeechSynthesizer()
self._proxy = proxy
self._voices = self._get_voices()
self._current_voice = None
def _get_voices(self):
voices = []
for voice in speechsynth.SpeechSynthesizer.all_voices():
voices.append((voice.id, voice.display_name))
return voices
async def _speak_text_async(self, text):
# Synthesize the text to a stream
stream = await self._synthesizer.synthesize_text_to_stream_async(text)
reader = DataReader(stream)
audio_data = bytearray(reader.read_bytes(stream.size))
reader.detach_stream()
# Use a method of your choice to play audio data
# e.g., play using winsound or save to a WAV file
def setProperty(self, name, value):
if name == "voice":
for voice in speechsynth.SpeechSynthesizer.all_voices():
if voice.display_name == value:
self._synthesizer.voice = voice
self._current_voice = voice
break
elif name == "rate":
# UWP does not support rate directly in the same way; consider using workarounds
pass
elif name == "volume":
# UWP does not have volume control directly in the SpeechSynthesizer
pass
else:
raise KeyError(f"Unknown property '{name}' for UWP")
def getProperty(self, name):
if name == "voices":
return [voice[1] for voice in self._voices]
elif name == "voice":
return self._current_voice.display_name if self._current_voice else None
else:
raise KeyError(f"Unknown property '{name}' for UWP")
def say(self, text):
self._proxy.setBusy(True)
self._proxy.notify("started-utterance")
asyncio.run(self._speak_text_async(text))
self._proxy.notify("finished-utterance", completed=True)
self._proxy.setBusy(False)
def stop(self):
# Implement stop if possible or use a workaround
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment