dedmen/gist:575c0d597dc2c0c3b19ea9c6c64d1794

## gistfile1.txt
//Header File:

#pragma once

struct ISpVoice;
struct ISpStreamFormat;

namespace Speech
{
	class TextToSpeech
	{
		friend class TextToSpeechClass;
		std::shared_ptr<ISpVoice> m_Voice;
		bool m_IsSpeaking = false;
		bool m_HasPendingTTSNotifications = false;
		std::shared_ptr<ISpStreamFormat> m_NullStream;
	public:
		TextToSpeech();
		/**
		\brief The TextToSpeech instance needs to stay alive until speech is over https://learn.microsoft.com/en-us/previous-versions/windows/desktop/ms717077(v=vs.85)
		*/
		void SpeakXML(std::string_view stuff);

		/**
		\brief The TextToSpeech instance needs to stay alive until speech is over https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup?tabs=csharp
		*/
		void SpeakSSML(std::string_view stuff);

		/**
		\brief Required to be called for events to fire
		*/
		void Tick();

		void SetAudioEnabled(bool enabled);

		/**
		\brief Called during speech to update visual face/mouth state https://learn.microsoft.com/en-us/previous-versions/windows/desktop/ms717289(v=vs.85)
		*/
		void OnMouthStateChanged(int viseme); //#TODO implement this as a event that's forwarded to script via extension callback

		//! https://learn.microsoft.com/en-us/previous-versions/windows/desktop/ee431828(v=vs.85) phoneme ID
		void OnPhoneme(int durationMilliseconds, int phoneme); //#TODO implement this as a event that's forwarded to script via extension callback

		/**
		\brief Called when whole queued speech has completed
		*/
		void OnSpeechDone(); //#TODO implement this as a event that's forwarded to script via extension callback

		/**
		\brief Called when a XML bookmark tag has been hit
		*/
		void OnBookmark(std::string_view name); //#TODO implement this as a event that's forwarded to script via extension callback


		void OnTTSNotification();
	}
}


//Source file:
#include "TextToSpeech.h"


#include "sdkddkver.h"
// Fix undefined defines errors https://developercommunity.visualstudio.com/t/several-warnings-in-windows-sdk-100177630-in-windo/435362
#define _WIN32_WINNT_WIN10_TH2 NTDDI_WIN10_TH2
#define _WIN32_WINNT_WIN10_RS1 NTDDI_WIN10_RS1
#define _WIN32_WINNT_WIN10_RS2 NTDDI_WIN10_RS2
#define _WIN32_WINNT_WIN10_RS3 NTDDI_WIN10_RS3
#define _WIN32_WINNT_WIN10_RS4 NTDDI_WIN10_RS4
#define _WIN32_WINNT_WIN10_RS5 NTDDI_WIN10_RS5

// warning C4996: 'GetVersionExW': was declared deprecated
#pragma warning(disable: 4996)
#include "sphelper.h"


namespace Speech
{
	void __stdcall SPNOTIFYCALLBACK(WPARAM wParam, LPARAM lParam)
	{
		reinterpret_cast<TextToSpeech*>(wParam)->OnTTSNotification();
	}

	TextToSpeech::TextToSpeech()
	{

		if( FAILED( CoInitialize(NULL) ) )
		{
			return;
		}

		// Perform application initialization:
		//if (!InitInstance (hInstance, nCmdShow))
		//{
		//	return FALSE;
		//}

		//hAccelTable = LoadAccelerators(hInstance, (LPCTSTR)IDC_GUIAPP);

		// Main message loop:
		//while (GetMessage(&msg;, NULL, 0, 0))
		//{
		//	if (!TranslateAccelerator(msg.hwnd, hAccelTable, &msg;))
		//	{
		//		TranslateMessage(&msg;);
		//		DispatchMessage(&msg;);
		//	}
		//}

		//Initialize SAPI
		HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, reinterpret_cast<void**>(m_Voice.Init()));
		if(! SUCCEEDED( hr ) )
		{
		    //#TODO log error
		}
		m_Voice->SetNotifyCallbackFunction(SPNOTIFYCALLBACK, reinterpret_cast<uintptr_t>(this), reinterpret_cast<uintptr_t>(nullptr));

		auto hres = m_Voice->SetInterest(
			SPFEI(SPEI_PHONEME) | SPFEI(SPEI_VISEME) | SPFEI(SPEI_TTS_BOOKMARK),
			SPFEI(SPEI_PHONEME) | SPFEI(SPEI_VISEME) | SPFEI(SPEI_TTS_BOOKMARK) );
		__nop();
	}

	void TextToSpeech::SpeakXML(std::string_view stuff)
	{
		if (!m_Voice)
			return; // #TODO error print

#TODO convert string from UTF-8 to UTF-16
		std::wstring speechStr;
		// Utf8ToWideChar(speechStr, stuff.Data());
		m_Voice->Speak( speechStr.Data(), SPF_ASYNC | SPF_IS_XML | SPF_PURGEBEFORESPEAK, NULL);
		m_IsSpeaking = true;
	}

	void TextToSpeech::SpeakSSML(std::string_view stuff)
	{
		if (!m_Voice)
			return; // #TODO error print

#TODO convert string from UTF-8 to UTF-16
		std::wstring speechStr;
		//Utf8ToWideChar(speechStr, stuff.Data());
		m_Voice->Speak( speechStr.Data(), SPF_ASYNC | SPF_PARSE_SSML | SPF_PURGEBEFORESPEAK, NULL);
		m_IsSpeaking = true;
	}

	void TextToSpeech::Tick()
	{
		if (!m_Voice || !m_IsSpeaking)
			return; // #TODO error print

		bool signalled = WaitForSingleObjectEx(m_Voice->SpeakCompleteEvent(), 0, true) != WAIT_TIMEOUT;
		if (signalled)
		{
			m_IsSpeaking = false;
			OnSpeechDone();
		}

		if (true) // m_HasPendingTTSNotifications
		{
			SPEVENT eventItem;
			memset( &eventItem, 0,sizeof(SPEVENT));
			while (m_Voice->GetEvents(1, &eventItem , NULL) == S_OK)
			{
				switch (eventItem.eEventId)
				{
				case SPEI_VISEME:
				{
					auto viseme = static_cast<SPVISEMES>(LOWORD(eventItem.lParam));

					OnMouthStateChanged(viseme);
					break;
				}
				case SPEI_TTS_BOOKMARK:
				{
				    #TODO convert string from UTF-16 to UTF-8
					std::string bookmarkName;
					// WideCharToUtf8(bookmarkName, (wchar_t*)eventItem.lParam);

					OnBookmark(bookmarkName);
					break;
				}
				case SPEI_PHONEME:
				{
					OnPhoneme(HIWORD(eventItem.wParam), LOWORD(eventItem.lParam));
				}
				default:
					break;
				}

				SpClearEvent(&eventItem);
			}
		}
	}

	struct NullSPStream : ISpStreamFormat
	{
		HRESULT QueryInterface(const IID& riid, void** ppvObject) override
		{
			assert(ppvObject != nullptr);
			HRESULT hr = S_OK;

			if (riid == __uuidof(IUnknown) || riid == __uuidof(ISpStreamFormat) || riid == __uuidof(IStream))
			{
				*ppvObject = this;
				AddRef();
			}
			else
			{
				*ppvObject = nullptr;
				hr = E_NOINTERFACE;
			}

			return hr;
		}
		ULONG AddRef() override
		{
			return S_OK;
		}
		ULONG Release() override
		{
			return S_OK;
		}
		HRESULT Read(void* pv, ULONG cb, ULONG* pcbRead) override
		{
			return E_NOTIMPL;
		}
		HRESULT Write(const void* pv, ULONG cb, ULONG* pcbWritten) override
		{
			return S_OK;
		}
		HRESULT Seek(LARGE_INTEGER dlibMove, DWORD dwOrigin, ULARGE_INTEGER* plibNewPosition) override
		{
			if (plibNewPosition)
				plibNewPosition->QuadPart = dlibMove.QuadPart;

			return S_OK;
		}
		HRESULT SetSize(ULARGE_INTEGER libNewSize) override
		{
			return E_NOTIMPL;
		}
		HRESULT CopyTo(IStream* pstm, ULARGE_INTEGER cb, ULARGE_INTEGER* pcbRead, ULARGE_INTEGER* pcbWritten) override
		{
			return E_NOTIMPL;
		}
		HRESULT Commit(DWORD grfCommitFlags) override
		{
			return E_NOTIMPL;
		}
		HRESULT Revert() override
		{
			return E_NOTIMPL;
		}
		HRESULT LockRegion(ULARGE_INTEGER libOffset, ULARGE_INTEGER cb, DWORD dwLockType) override
		{
			return E_NOTIMPL;
		}
		HRESULT UnlockRegion(ULARGE_INTEGER libOffset, ULARGE_INTEGER cb, DWORD dwLockType) override
		{
			return E_NOTIMPL;
		}
		HRESULT Stat(STATSTG* pstatstg, DWORD grfStatFlag) override
		{
			return E_NOTIMPL;
		}
		HRESULT Clone(IStream** ppstm) override
		{
			return E_NOTIMPL;
		}
		HRESULT GetFormat(GUID* pguidFormatId, WAVEFORMATEX** ppCoMemWaveFormatEx) override
		{

			*pguidFormatId = SPDFID_WaveFormatEx;
			*ppCoMemWaveFormatEx = (WAVEFORMATEX*)CoTaskMemAlloc(sizeof(WAVEFORMATEX));
			(*ppCoMemWaveFormatEx)->cbSize = 0;
			(*ppCoMemWaveFormatEx)->nAvgBytesPerSec = 512;
			(*ppCoMemWaveFormatEx)->nBlockAlign = 1;
			(*ppCoMemWaveFormatEx)->nChannels = 1;
			(*ppCoMemWaveFormatEx)->nSamplesPerSec = 44100;
			(*ppCoMemWaveFormatEx)->wBitsPerSample = 8;
			(*ppCoMemWaveFormatEx)->wFormatTag = WAVE_FORMAT_PCM;

			return S_OK;
		}
	};

	void TextToSpeech::SetAudioEnabled(bool enabled)
	{
		if (!m_Voice)
			return; // #TODO error print

		if (enabled)
			m_Voice->SetOutput(nullptr, TRUE); // Default Audio device
		else
		{
			if (!m_NullStream)
				m_NullStream = new NullSPStream();

			auto res = m_Voice->SetOutput(m_NullStream, TRUE);
			__nop();
		}

	}

	void TextToSpeech::OnTTSNotification()
	{
		m_HasPendingTTSNotifications = true;
	}

}
	//Header File:

	#pragma once

	struct ISpVoice;
	struct ISpStreamFormat;

	namespace Speech
	{
	class TextToSpeech
	{
	friend class TextToSpeechClass;
	std::shared_ptr<ISpVoice> m_Voice;
	bool m_IsSpeaking = false;
	bool m_HasPendingTTSNotifications = false;
	std::shared_ptr<ISpStreamFormat> m_NullStream;
	public:
	TextToSpeech();
	/**
	\brief The TextToSpeech instance needs to stay alive until speech is over https://learn.microsoft.com/en-us/previous-versions/windows/desktop/ms717077(v=vs.85)
	*/
	void SpeakXML(std::string_view stuff);

	/**
	\brief The TextToSpeech instance needs to stay alive until speech is over https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup?tabs=csharp
	*/
	void SpeakSSML(std::string_view stuff);

	/**
	\brief Required to be called for events to fire
	*/
	void Tick();

	void SetAudioEnabled(bool enabled);

	/**
	\brief Called during speech to update visual face/mouth state https://learn.microsoft.com/en-us/previous-versions/windows/desktop/ms717289(v=vs.85)
	*/
	void OnMouthStateChanged(int viseme); //#TODO implement this as a event that's forwarded to script via extension callback

	//! https://learn.microsoft.com/en-us/previous-versions/windows/desktop/ee431828(v=vs.85) phoneme ID
	void OnPhoneme(int durationMilliseconds, int phoneme); //#TODO implement this as a event that's forwarded to script via extension callback

	/**
	\brief Called when whole queued speech has completed
	*/
	void OnSpeechDone(); //#TODO implement this as a event that's forwarded to script via extension callback

	/**
	\brief Called when a XML bookmark tag has been hit
	*/
	void OnBookmark(std::string_view name); //#TODO implement this as a event that's forwarded to script via extension callback


	void OnTTSNotification();
	}
	}



	//Source file:
	#include "TextToSpeech.h"


	#include "sdkddkver.h"
	// Fix undefined defines errors https://developercommunity.visualstudio.com/t/several-warnings-in-windows-sdk-100177630-in-windo/435362
	#define _WIN32_WINNT_WIN10_TH2 NTDDI_WIN10_TH2
	#define _WIN32_WINNT_WIN10_RS1 NTDDI_WIN10_RS1
	#define _WIN32_WINNT_WIN10_RS2 NTDDI_WIN10_RS2
	#define _WIN32_WINNT_WIN10_RS3 NTDDI_WIN10_RS3
	#define _WIN32_WINNT_WIN10_RS4 NTDDI_WIN10_RS4
	#define _WIN32_WINNT_WIN10_RS5 NTDDI_WIN10_RS5

	// warning C4996: 'GetVersionExW': was declared deprecated
	#pragma warning(disable: 4996)
	#include "sphelper.h"


	namespace Speech
	{
	void __stdcall SPNOTIFYCALLBACK(WPARAM wParam, LPARAM lParam)
	{
	reinterpret_cast<TextToSpeech*>(wParam)->OnTTSNotification();
	}

	TextToSpeech::TextToSpeech()
	{

	if( FAILED( CoInitialize(NULL) ) )
	{
	return;
	}

	// Perform application initialization:
	//if (!InitInstance (hInstance, nCmdShow))
	//{
	// return FALSE;
	//}

	//hAccelTable = LoadAccelerators(hInstance, (LPCTSTR)IDC_GUIAPP);

	// Main message loop:
	//while (GetMessage(&msg;, NULL, 0, 0))
	//{
	// if (!TranslateAccelerator(msg.hwnd, hAccelTable, &msg;))
	// {
	// TranslateMessage(&msg;);
	// DispatchMessage(&msg;);
	// }
	//}

	//Initialize SAPI
	HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, reinterpret_cast<void**>(m_Voice.Init()));
	if(! SUCCEEDED( hr ) )
	{
	//#TODO log error
	}
	m_Voice->SetNotifyCallbackFunction(SPNOTIFYCALLBACK, reinterpret_cast<uintptr_t>(this), reinterpret_cast<uintptr_t>(nullptr));

	auto hres = m_Voice->SetInterest(
	SPFEI(SPEI_PHONEME) \| SPFEI(SPEI_VISEME) \| SPFEI(SPEI_TTS_BOOKMARK),
	SPFEI(SPEI_PHONEME) \| SPFEI(SPEI_VISEME) \| SPFEI(SPEI_TTS_BOOKMARK) );
	__nop();
	}

	void TextToSpeech::SpeakXML(std::string_view stuff)
	{
	if (!m_Voice)
	return; // #TODO error print

	#TODO convert string from UTF-8 to UTF-16
	std::wstring speechStr;
	// Utf8ToWideChar(speechStr, stuff.Data());
	m_Voice->Speak( speechStr.Data(), SPF_ASYNC \| SPF_IS_XML \| SPF_PURGEBEFORESPEAK, NULL);
	m_IsSpeaking = true;
	}

	void TextToSpeech::SpeakSSML(std::string_view stuff)
	{
	if (!m_Voice)
	return; // #TODO error print

	#TODO convert string from UTF-8 to UTF-16
	std::wstring speechStr;
	//Utf8ToWideChar(speechStr, stuff.Data());
	m_Voice->Speak( speechStr.Data(), SPF_ASYNC \| SPF_PARSE_SSML \| SPF_PURGEBEFORESPEAK, NULL);
	m_IsSpeaking = true;
	}

	void TextToSpeech::Tick()
	{
	if (!m_Voice \|\| !m_IsSpeaking)
	return; // #TODO error print

	bool signalled = WaitForSingleObjectEx(m_Voice->SpeakCompleteEvent(), 0, true) != WAIT_TIMEOUT;
	if (signalled)
	{
	m_IsSpeaking = false;
	OnSpeechDone();
	}

	if (true) // m_HasPendingTTSNotifications
	{
	SPEVENT eventItem;
	memset( &eventItem, 0,sizeof(SPEVENT));
	while (m_Voice->GetEvents(1, &eventItem , NULL) == S_OK)
	{
	switch (eventItem.eEventId)
	{
	case SPEI_VISEME:
	{
	auto viseme = static_cast<SPVISEMES>(LOWORD(eventItem.lParam));

	OnMouthStateChanged(viseme);
	break;
	}
	case SPEI_TTS_BOOKMARK:
	{
	#TODO convert string from UTF-16 to UTF-8
	std::string bookmarkName;
	// WideCharToUtf8(bookmarkName, (wchar_t*)eventItem.lParam);

	OnBookmark(bookmarkName);
	break;
	}
	case SPEI_PHONEME:
	{
	OnPhoneme(HIWORD(eventItem.wParam), LOWORD(eventItem.lParam));
	}
	default:
	break;
	}

	SpClearEvent(&eventItem);
	}
	}
	}

	struct NullSPStream : ISpStreamFormat
	{
	HRESULT QueryInterface(const IID& riid, void** ppvObject) override
	{
	assert(ppvObject != nullptr);
	HRESULT hr = S_OK;

	if (riid == __uuidof(IUnknown) \|\| riid == __uuidof(ISpStreamFormat) \|\| riid == __uuidof(IStream))
	{
	*ppvObject = this;
	AddRef();
	}
	else
	{
	*ppvObject = nullptr;
	hr = E_NOINTERFACE;
	}

	return hr;
	}
	ULONG AddRef() override
	{
	return S_OK;
	}
	ULONG Release() override
	{
	return S_OK;
	}
	HRESULT Read(void* pv, ULONG cb, ULONG* pcbRead) override
	{
	return E_NOTIMPL;
	}
	HRESULT Write(const void* pv, ULONG cb, ULONG* pcbWritten) override
	{
	return S_OK;
	}
	HRESULT Seek(LARGE_INTEGER dlibMove, DWORD dwOrigin, ULARGE_INTEGER* plibNewPosition) override
	{
	if (plibNewPosition)
	plibNewPosition->QuadPart = dlibMove.QuadPart;

	return S_OK;
	}
	HRESULT SetSize(ULARGE_INTEGER libNewSize) override
	{
	return E_NOTIMPL;
	}
	HRESULT CopyTo(IStream* pstm, ULARGE_INTEGER cb, ULARGE_INTEGER* pcbRead, ULARGE_INTEGER* pcbWritten) override
	{
	return E_NOTIMPL;
	}
	HRESULT Commit(DWORD grfCommitFlags) override
	{
	return E_NOTIMPL;
	}
	HRESULT Revert() override
	{
	return E_NOTIMPL;
	}
	HRESULT LockRegion(ULARGE_INTEGER libOffset, ULARGE_INTEGER cb, DWORD dwLockType) override
	{
	return E_NOTIMPL;
	}
	HRESULT UnlockRegion(ULARGE_INTEGER libOffset, ULARGE_INTEGER cb, DWORD dwLockType) override
	{
	return E_NOTIMPL;
	}
	HRESULT Stat(STATSTG* pstatstg, DWORD grfStatFlag) override
	{
	return E_NOTIMPL;
	}
	HRESULT Clone(IStream** ppstm) override
	{
	return E_NOTIMPL;
	}
	HRESULT GetFormat(GUID* pguidFormatId, WAVEFORMATEX** ppCoMemWaveFormatEx) override
	{

	*pguidFormatId = SPDFID_WaveFormatEx;
	ppCoMemWaveFormatEx = (WAVEFORMATEX)CoTaskMemAlloc(sizeof(WAVEFORMATEX));
	(*ppCoMemWaveFormatEx)->cbSize = 0;
	(*ppCoMemWaveFormatEx)->nAvgBytesPerSec = 512;
	(*ppCoMemWaveFormatEx)->nBlockAlign = 1;
	(*ppCoMemWaveFormatEx)->nChannels = 1;
	(*ppCoMemWaveFormatEx)->nSamplesPerSec = 44100;
	(*ppCoMemWaveFormatEx)->wBitsPerSample = 8;
	(*ppCoMemWaveFormatEx)->wFormatTag = WAVE_FORMAT_PCM;

	return S_OK;
	}
	};

	void TextToSpeech::SetAudioEnabled(bool enabled)
	{
	if (!m_Voice)
	return; // #TODO error print

	if (enabled)
	m_Voice->SetOutput(nullptr, TRUE); // Default Audio device
	else
	{
	if (!m_NullStream)
	m_NullStream = new NullSPStream();

	auto res = m_Voice->SetOutput(m_NullStream, TRUE);
	__nop();
	}

	}

	void TextToSpeech::OnTTSNotification()
	{
	m_HasPendingTTSNotifications = true;
	}

	}