Lachee/OpenTTS.html

## OpenTTS.html


<html>
    <head>
        <style>
            * { font-family: Arial, Helvetica, sans-serif;}
            div {
                background: #00000008;
                margin: 10px;
                min-height: 1em;
                padding: 10px;
                border-radius: 5px;
            }

        </style>
    </head>
    <body>

        OpenTTS URL
        <input type="url" id="api" name="api" value="http://localhost:5500/">

        Use Conqui (slow):
        <input type="checkbox" id="coqui" checked="true" />
        <button id="play">Begin</button><br>
        <hr>

        Status: <div id="status"></div>
        <hr>

        Recognised: <div id="text"></div>
        Queue: <ul id="queue"></ul>
        Speech: <div id="speech"></div>
        <audio id="audio" controls="" autoplay="" src="blob:http://localhost:5500/cd4f8b80-c654-4bfa-b233-7728b718cd8e"></audio>
    </body>
<script>
    // STT: https://www.google.com/intl/en/chrome/demos/speech.html
    // TTS: https://github.com/mdn/dom-examples/tree/main/web-speech-api/speak-easy-synthesis

    const lang = 'en-AU';
    const voice = 'larynx:southern_english_female-glow_tts'; // 'Google UK English Female';
    const coquiVoice    = 'coqui-tts:en_vctk';
    const coquiSpeaker  = 'p259';

    const playButton = document.getElementById('play');
    const resultBox = document.getElementById('text');
    const recognitionStatusBox = document.getElementById('status');
    const speechBox = document.getElementById('speech');
    const queueList = document.getElementById('queue');
    const coquiCheckbox = document.getElementById('coqui');
    const audio = document.getElementById('audio');

    const apiInput = document.getElementById('api');

    let isSpeaking = false;
    let isListening = false;
    let recognitionResults = '';
    let recognition = new webkitSpeechRecognition();
    let synthDownloadQueue = [];
    let synthSpeakQueue = [];

    recognition.lang = lang;
    recognition.continuous = true;
    recognition.interimResults = true;
    recognition.maxAlternatives = 1;

    recognition.onstart = function() {
        recognitionStatusBox.innerText = 'Recognition started. Waiting...';
    };

    recognition.onerror = function(event) {
        if (event.error == 'no-speech') {
            recognitionStatusBox.innerText = 'No speech available';
        }
        if (event.error == 'audio-capture') {
            recognitionStatusBox.innerText = 'No microphone available';
        }
        if (event.error == 'not-allowed') {
            if (event.timeStamp - start_timestamp < 100) {
                recognitionStatusBox.innerText = 'Web Speech API Blocked';
            } else {
                recognitionStatusBox.innerText = 'Web Speech API Denied';
            }
        }
    };

    recognition.onend = function(e) {
        recognitionStatusBox.innerText = 'Recognition ended.';
        if (isListening) {
            console.warn('recognition has ended early', e);
            recognition.start();
        }
    };


    recognition.onresult = function(event) {
        let interim = '';

        for (var i = event.resultIndex; i < event.results.length; ++i) {
            if (event.results[i].isFinal) {
                recognitionResults = event.results[i][0].transcript;
                console.log('speaking', event.results[i]);
                if (event.results[i].length > 1) alert('DEBUG: ANOTHER RESULT');
                speak(recognitionResults);
            } else {
                interim += event.results[i][0].transcript;
            }
        }

        resultBox.innerHTML = linebreak(recognitionResults);
        recognitionStatusBox.innerHTML = linebreak(interim);
    }

    function stopListening() {
        console.log('stopped listening');
        isListening = false;
        recognition.stop();
        playButton.innerText = 'Begin';
    }

    function startListen() {
        if (isListening)
            stopListening();

        isListening = true;
        recognition.start();
        resultBox.innerHTML = '';
        playButton.innerText = 'End';
    }

    function speak(words) {
        if (words === "")
            return false;

        /** Downloads the synth for the words and returns a URL for the blob */
        const download = async (words, attempts = 3) => {
            try {
                const url = new URL('/api/tts', apiInput.value);
                if (coquiCheckbox.checked) {
                    url.searchParams.append('voice', coquiVoice);
                    url.searchParams.append('speakerId', coquiSpeaker);
                } else {
                    url.searchParams.append('voice', voice);
                }

                url.searchParams.append('lang', 'en');
                url.searchParams.append('text', words);
                url.searchParams.append('vocoder', 'medium');      // quality
                url.searchParams.append('ssml', false);  // SSML support
                console.log('requesting ', url.toString());

                const response = await fetch(url);
                if (!response.ok) {
                    console.error('failed to synth', response);
                    return;
                }

                const blob = await response.blob();
                return URL.createObjectURL(blob);
            }catch(e) {
                if (attempts <= 0) {
                    console.error('DOWNLOAD ABORTED', e);
                } else {
                    console.warn('failed to download clip, trying again in some time', e);
                    return new Promise((resolve) => {
                        setTimeout(() => download(words, attempts - 1).then(r => resolve(r)), 250);
                    });
                }
            }
        }

        synthDownloadQueue.push({ words, synth: download(words) });
        updateDownloadQueue();

        if (!isSpeaking) {
            startSpeaking();
        }
    }
    async function startSpeaking() {
        const play = (src) => new Promise((resolve, reject) => {
            audio.src = src;
            audio.play();
            audio.onended = resolve;
        });

        isSpeaking = true;
        while(synthDownloadQueue.length > 0) {
            const download = synthDownloadQueue.shift();
            updateDownloadQueue();
            speechBox.innerText = download.words + "... (downloading)";
            const url = await download.synth;

            speechBox.innerText = download.words;
            console.log('playing', url);
            await play(url);

            speechBox.innerText = '';
        }
        isSpeaking = false;
    }

    function updateDownloadQueue() {
        let html = '';
        for(const download of synthDownloadQueue)
            html = `<li><div>${download.words}</div></li>${html}`
        queueList.innerHTML = html;
    }

    playButton.addEventListener('click', () => {
        if (isListening) {
            stopListening();
        } else {
            startListen();
        }
    });


    function capitalize(s) {
        const first_char = /\S/;
        return s.replace(first_char, function(m) { return m.toUpperCase(); });
    }

    function linebreak(s) {
        const two_line = /\n\n/g;
        const one_line = /\n/g;
        return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
    }

    document.addEventListener('DOMContentLoaded', () => {
        //startListen();
    })
</script>
</html>

## WebSpeech.html


<html>
    <head>
        <style>
            * { font-family: Arial, Helvetica, sans-serif;}
            div {
                background: #00000008;
                margin: 10px;
                min-height: 1em;
                padding: 10px;
                border-radius: 5px;
            }

        </style>
    </head>
    <body>

        <div>
            <input type="range" id="pitch" name="pitch" min="0" max="2" value="1" step="0.1">
            <label for="volume">Pitch</label>
        </div>

        <div>
            <input type="range" id="rate" name="rate" min="0" max="2" value="1" step="0.1">
            <label for="cowbell">Rate</label>
        </div>
        <button id="play">Begin</button><br>
        <hr>

        Status: <div id="status"></div>
        <hr>

        Recognised: <div id="text"></div>
        Queue: <ul id="queue"></ul>
        Speech: <div id="speech"></div>
    </body>
<script>
    // STT: https://www.google.com/intl/en/chrome/demos/speech.html
    // TTS: https://github.com/mdn/dom-examples/tree/main/web-speech-api/speak-easy-synthesis

    const lang = 'en-US';
    const voice = 'Microsoft Catherine - English (Australia)'; // 'Google UK English Female';
    const synth = window.speechSynthesis;

    const playButton = document.getElementById('play');
    const resultBox = document.getElementById('text');
    const recognitionStatusBox = document.getElementById('status');
    const speechBox = document.getElementById('speech');
    const queueList = document.getElementById('queue');

    const pitchRange = document.getElementById('pitch');
    const rateRange = document.getElementById('rate');


    let voices = [];
    function populateVoices() {
        voices = synth.getVoices().sort(function (a, b) {
            const aname = a.name.toUpperCase();
            const bname = b.name.toUpperCase();
            if (aname < bname) {
                return -1;
            } else if (aname == bname) {
                return 0;
            } else {
                return +1;
            }
        });
        console.log('voices', voices);
    }
    synth.addEventListener('voiceschanged', () => populateVoices());

    let isSynthing = false;
    let isListening = false;
    let recognitionResults = '';
    let recognition = new webkitSpeechRecognition();
    let synthQueue = [];

    recognition.lang = lang;
    recognition.continuous = true;
    recognition.interimResults = true;
    recognition.maxAlternatives = 1;

    recognition.onstart = function() {
        recognitionStatusBox.innerText = 'Recognition started. Waiting...';
    };

    recognition.onerror = function(event) {
        if (event.error == 'no-speech') {
            recognitionStatusBox.innerText = 'No speech available';
        }
        if (event.error == 'audio-capture') {
            recognitionStatusBox.innerText = 'No microphone available';
        }
        if (event.error == 'not-allowed') {
            if (event.timeStamp - start_timestamp < 100) {
                recognitionStatusBox.innerText = 'Web Speech API Blocked';
            } else {
                recognitionStatusBox.innerText = 'Web Speech API Denied';
            }
        }
    };

    recognition.onend = function(e) {
        recognitionStatusBox.innerText = 'Recognition ended.';
        if (isListening) {
            console.warn('recognition has ended early', e);
            recognition.start();
        }
    };


    recognition.onresult = function(event) {
        let interim = '';

        for (var i = event.resultIndex; i < event.results.length; ++i) {
            if (event.results[i].isFinal) {
                recognitionResults = event.results[i][0].transcript;
                console.log('speaking', event.results[i]);
                if (event.results[i].length > 1) alert('DEBUG: ANOTHER RESULT');
                speak(recognitionResults);
            } else {
                interim += event.results[i][0].transcript;
            }
        }

        resultBox.innerHTML = linebreak(recognitionResults);
        recognitionStatusBox.innerHTML = linebreak(interim);
    }

    function stopListening() {
        console.log('stopped listening');
        isListening = false;
        recognition.stop();
        playButton.innerText = 'Begin';
    }

    function startListen() {
        if (isListening)
            stopListening();

        isListening = true;
        recognition.start();
        resultBox.innerHTML = '';
        playButton.innerText = 'End';
    }


    function speak(words) {

        if (words === "")
            return false;

        synthQueue.push(words);
        updateQueue();

        if (!isSynthing) {
            startSynthQueue();
        }
    }
    async function startSynthQueue() {
        const say = (words) => new Promise((resolve, reject) => {
            const utterThis = new SpeechSynthesisUtterance(words);

            utterThis.onend = function (event) {
                console.log("SpeechSynthesisUtterance.onend");
                recognitionStatusBox.innerText = "Synth Ended";
                resolve(words);
            };

            utterThis.onerror = function (event) {
                console.error("SpeechSynthesisUtterance.onerror", event);
                recognitionStatusBox.innerText = "Synth Errored";
                reject(event);
            };

            if (voices.length == 0)
                populateVoices();

            for (let i = 0; i < voices.length; i++) {
                if (voices[i].name === voice) {
                    utterThis.voice = voices[i];
                    break;
                }
            }

            utterThis.pitch = pitchRange.value;
            utterThis.rate = rateRange.value;
            synth.speak(utterThis);
            speechBox.innerText = words;
        });

        isSynthing = true;
        while(synthQueue.length > 0) {
            const words = synthQueue.shift();
            updateQueue();
            await say(words);
        }
        isSynthing = false;
    }

    function updateQueue() {
        let html = '';
        for(const w of synthQueue)
            html = `<li><div>${w}</div></li>${html}`
        queueList.innerHTML = html;
    }

    playButton.addEventListener('click', () => {
        if (isListening) {
            stopListening();
        } else {
            startListen();
        }
    });


    function capitalize(s) {
        const first_char = /\S/;
        return s.replace(first_char, function(m) { return m.toUpperCase(); });
    }

    function linebreak(s) {
        const two_line = /\n\n/g;
        const one_line = /\n/g;
        return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
    }

    document.addEventListener('DOMContentLoaded', () => {
        //startListen();
    })
</script>
</html>


	<html>
	<head>
	<style>
	* { font-family: Arial, Helvetica, sans-serif;}
	div {
	background: #00000008;
	margin: 10px;
	min-height: 1em;
	padding: 10px;
	border-radius: 5px;
	}

	</style>
	</head>
	<body>

	OpenTTS URL
	<input type="url" id="api" name="api" value="http://localhost:5500/">

	Use Conqui (slow):
	<input type="checkbox" id="coqui" checked="true" />
	<button id="play">Begin</button><br>
	<hr>

	Status: <div id="status"></div>
	<hr>

	Recognised: <div id="text"></div>
	Queue: <ul id="queue"></ul>
	Speech: <div id="speech"></div>
	<audio id="audio" controls="" autoplay="" src="blob:http://localhost:5500/cd4f8b80-c654-4bfa-b233-7728b718cd8e"></audio>
	</body>
	<script>
	// STT: https://www.google.com/intl/en/chrome/demos/speech.html
	// TTS: https://github.com/mdn/dom-examples/tree/main/web-speech-api/speak-easy-synthesis

	const lang = 'en-AU';
	const voice = 'larynx:southern_english_female-glow_tts'; // 'Google UK English Female';
	const coquiVoice = 'coqui-tts:en_vctk';
	const coquiSpeaker = 'p259';

	const playButton = document.getElementById('play');
	const resultBox = document.getElementById('text');
	const recognitionStatusBox = document.getElementById('status');
	const speechBox = document.getElementById('speech');
	const queueList = document.getElementById('queue');
	const coquiCheckbox = document.getElementById('coqui');
	const audio = document.getElementById('audio');

	const apiInput = document.getElementById('api');

	let isSpeaking = false;
	let isListening = false;
	let recognitionResults = '';
	let recognition = new webkitSpeechRecognition();
	let synthDownloadQueue = [];
	let synthSpeakQueue = [];

	recognition.lang = lang;
	recognition.continuous = true;
	recognition.interimResults = true;
	recognition.maxAlternatives = 1;

	recognition.onstart = function() {
	recognitionStatusBox.innerText = 'Recognition started. Waiting...';
	};

	recognition.onerror = function(event) {
	if (event.error == 'no-speech') {
	recognitionStatusBox.innerText = 'No speech available';
	}
	if (event.error == 'audio-capture') {
	recognitionStatusBox.innerText = 'No microphone available';
	}
	if (event.error == 'not-allowed') {
	if (event.timeStamp - start_timestamp < 100) {
	recognitionStatusBox.innerText = 'Web Speech API Blocked';
	} else {
	recognitionStatusBox.innerText = 'Web Speech API Denied';
	}
	}
	};

	recognition.onend = function(e) {
	recognitionStatusBox.innerText = 'Recognition ended.';
	if (isListening) {
	console.warn('recognition has ended early', e);
	recognition.start();
	}
	};


	recognition.onresult = function(event) {
	let interim = '';

	for (var i = event.resultIndex; i < event.results.length; ++i) {
	if (event.results[i].isFinal) {
	recognitionResults = event.results[i][0].transcript;
	console.log('speaking', event.results[i]);
	if (event.results[i].length > 1) alert('DEBUG: ANOTHER RESULT');
	speak(recognitionResults);
	} else {
	interim += event.results[i][0].transcript;
	}
	}

	resultBox.innerHTML = linebreak(recognitionResults);
	recognitionStatusBox.innerHTML = linebreak(interim);
	}

	function stopListening() {
	console.log('stopped listening');
	isListening = false;
	recognition.stop();
	playButton.innerText = 'Begin';
	}

	function startListen() {
	if (isListening)
	stopListening();

	isListening = true;
	recognition.start();
	resultBox.innerHTML = '';
	playButton.innerText = 'End';
	}

	function speak(words) {
	if (words === "")
	return false;

	/** Downloads the synth for the words and returns a URL for the blob */
	const download = async (words, attempts = 3) => {
	try {
	const url = new URL('/api/tts', apiInput.value);
	if (coquiCheckbox.checked) {
	url.searchParams.append('voice', coquiVoice);
	url.searchParams.append('speakerId', coquiSpeaker);
	} else {
	url.searchParams.append('voice', voice);
	}

	url.searchParams.append('lang', 'en');
	url.searchParams.append('text', words);
	url.searchParams.append('vocoder', 'medium'); // quality
	url.searchParams.append('ssml', false); // SSML support
	console.log('requesting ', url.toString());

	const response = await fetch(url);
	if (!response.ok) {
	console.error('failed to synth', response);
	return;
	}

	const blob = await response.blob();
	return URL.createObjectURL(blob);
	}catch(e) {
	if (attempts <= 0) {
	console.error('DOWNLOAD ABORTED', e);
	} else {
	console.warn('failed to download clip, trying again in some time', e);
	return new Promise((resolve) => {
	setTimeout(() => download(words, attempts - 1).then(r => resolve(r)), 250);
	});
	}
	}
	}

	synthDownloadQueue.push({ words, synth: download(words) });
	updateDownloadQueue();

	if (!isSpeaking) {
	startSpeaking();
	}
	}
	async function startSpeaking() {
	const play = (src) => new Promise((resolve, reject) => {
	audio.src = src;
	audio.play();
	audio.onended = resolve;
	});

	isSpeaking = true;
	while(synthDownloadQueue.length > 0) {
	const download = synthDownloadQueue.shift();
	updateDownloadQueue();
	speechBox.innerText = download.words + "... (downloading)";
	const url = await download.synth;

	speechBox.innerText = download.words;
	console.log('playing', url);
	await play(url);

	speechBox.innerText = '';
	}
	isSpeaking = false;
	}

	function updateDownloadQueue() {
	let html = '';
	for(const download of synthDownloadQueue)
	html = `<li><div>${download.words}</div></li>${html}`
	queueList.innerHTML = html;
	}

	playButton.addEventListener('click', () => {
	if (isListening) {
	stopListening();
	} else {
	startListen();
	}
	});


	function capitalize(s) {
	const first_char = /\S/;
	return s.replace(first_char, function(m) { return m.toUpperCase(); });
	}

	function linebreak(s) {
	const two_line = /\n\n/g;
	const one_line = /\n/g;
	return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
	}

	document.addEventListener('DOMContentLoaded', () => {
	//startListen();
	})
	</script>
	</html>