Skip to content

Instantly share code, notes, and snippets.

@kesor
Last active March 7, 2024 00:19
Show Gist options
  • Star 16 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save kesor/fc0d1a9b285011b74670109f22a59670 to your computer and use it in GitHub Desktop.
Save kesor/fc0d1a9b285011b74670109f22a59670 to your computer and use it in GitHub Desktop.
Making chat OpenAI use TTS
// paste this into your chrome dev console for Speech Synthesis
const originalFetch = window.fetch
const patchedFetch = (...args) => {
if (args[1].method == 'POST' && args[1].body.length > 0 && /moderations$/.test(args[0])) {
const aiResponse = JSON.parse(args[1].body)["input"].split("\n\n\n")
if (aiResponse.length > 1) {
const text = aiResponse.slice(1).join(". ").trim()
console.log(text)
speechSynthesis.cancel()
const utterance = new SpeechSynthesisUtterance(text)
utterance.voice = speechSynthesis.getVoices().find(v => v.lang === "en-GB" && v.name.includes("Male"));
utterance.rate = 1.3
let state = 'unknown'
utterance.addEventListener("start", (e) => {
console.log("start", e)
state = 'playing'
})
utterance.addEventListener("end", (e) => {
console.log("end", e)
state = 'done'
})
utterance.addEventListener("error", (e) => console.log("error", e))
utterance.addEventListener("pause", (e) => console.log("pause", e))
utterance.addEventListener("resume", (e) => console.log("resume", e))
speechSynthesis.speak(utterance)
const intVal = setInterval(() => {
if (state != 'playing')
clearInterval(intVal)
speechSynthesis.pause()
setTimeout(() => speechSynthesis.resume(), 50)
}, 10*1000)
}
}
return originalFetch(...args)
}
window.fetch = patchedFetch
const buttonShutIt = document.createElement("a")
buttonShutIt.style.border='1px solid black'
buttonShutIt.style.maxWidth='10em'
buttonShutIt.style.textAlign='center'
buttonShutIt.appendChild(document.createTextNode("shut-it!"))
document.getElementsByTagName("textarea")[0].parentElement.parentElement.prepend(buttonShutIt)
buttonShutIt.addEventListener("click", (e) => {
e.preventDefault()
speechSynthesis.cancel()
return false
})
// paste this into your chrome dev console for Speech Recognition
const recognition = new (window.webkitSpeechRecognition || window.SpeechRecognition)
recognition.lang = 'en-US'
recognition.addEventListener("result", this.recognitionResult)
recognition.addEventListener("end", (e) => console.log("end", event))
recognition.addEventListener("error", (e) => console.log("end", event))
function recognitionResult (event) {
let final = ""
for (let i = 0; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
final += event.results[i][0].transcript
}
}
console.log(final)
document.getElementsByTagName("textarea")[0].value = final
const buttons = document.getElementsByTagName('button')
buttons[buttons.length-1].click()
}
const buttonListen = document.createElement("a")
buttonListen.style.border='1px solid black'
buttonListen.style.maxWidth='10em'
buttonListen.style.textAlign='center'
buttonListen.appendChild(document.createTextNode("listen"))
document.getElementsByTagName("textarea")[0].parentElement.parentElement.prepend(buttonListen)
buttonListen.addEventListener("click", (e) => {
e.preventDefault()
recognition.start()
return false
})
// ==UserScript==
// @name ChatGPT Speak and Listen
// @namespace http://tampermonkey.net/
// @version 0.1
// @description try to take over the world!
// @author https://gist.github.com/kesor
// @downloadURL https://gist.github.com/kesor/fc0d1a9b285011b74670109f22a59670
// @match https://chat.openai.com/chat
// @grant unsafeWindow
// ==/UserScript==
/**
* You need the Chrome TamperMonkey extension for this --
* https://chrome.google.com/webstore/detail/tampermonkey/dhdgffkkebhmkfjojejmpbldmpobfkfo?hl=en
*/
(function() {
'use strict';
// Function to create a clickable <a> button with the given label
function createButton(label) {
let button = document.createElement('a');
button.className = "btn btn-sm text-center p-1 border-1";
button.innerHTML = label;
button.href = "#";
return button;
}
// Function to create an indicator that can be either visible or hidden
function createIndicator(text) {
let indicator = document.createElement('p');
indicator.className = 'btn btn-sm text-center p-1 border-1';
indicator.innerHTML = text;
indicator.style.visibility = 'hidden';
return indicator;
}
// Function to create a checkbox with the given label
function createCheckbox(label) {
let checkbox = document.createElement('input');
checkbox.className = 'btn btn-sm text-center p-1 border-1';
checkbox.type = "checkbox";
checkbox.checked = true;
return checkbox;
}
// Create the buttons
var listenButton = createButton("Listen");
var listeningIndicator = createIndicator("Listening");
var speaksCheckbox = createCheckbox("It speaks!");
var shutItButton = createButton("Shut it!");
const buttonContainer = document.createElement('div')
buttonContainer.className = 'btn-group'
// Add the buttons to the container
buttonContainer.appendChild(listenButton);
buttonContainer.appendChild(listeningIndicator);
buttonContainer.appendChild(speaksCheckbox);
buttonContainer.appendChild(shutItButton);
// add container to the bottom of the page
document.getElementsByTagName("textarea")[0].parentElement.parentElement.appendChild(buttonContainer);
shutItButton.addEventListener("click", (e) => {
e.preventDefault();
speechSynthesis.cancel();
return false;
});
/** --- Speech Synthesis section --- */
const voice = speechSynthesis.getVoices().find(
(v) => v.lang === "en-GB" && v.name.includes("Male")
);
function speakThis(text) {
// Create a speech utterance and speak it
speechSynthesis.cancel();
const utterance = new SpeechSynthesisUtterance(text);
utterance.voice = voice;
utterance.rate = 1.3;
let state = "unknown";
utterance.addEventListener("start", (e) => {
console.log("start", e);
state = "playing";
});
utterance.addEventListener("end", (e) => {
console.log("end", e);
state = "done";
});
speechSynthesis.speak(utterance);
const intVal = setInterval(() => {
if (state != "playing") clearInterval(intVal);
speechSynthesis.pause();
setTimeout(() => speechSynthesis.resume(), 50);
}, 10 * 1000);
}
// overwrite window.fetch()
const originalFetch = unsafeWindow.fetch;
const patchedFetch = (...args) => {
const [ url, options ] = args
// Extract the AI response from the POST body
if (speaksCheckbox.checked && options.method == "POST" && options.body.length > 0 && /moderations$/.test(url)) {
const aiResponse = JSON.parse(options.body).input.split("\n\n\n");
if (aiResponse.length > 1) {
const text = aiResponse.slice(1).join(". ").trim();
console.log(text);
speakThis(text);
}
}
return originalFetch(...args);
};
unsafeWindow.fetch = patchedFetch;
/** --- Speech Recognition section --- */
listenButton.addEventListener("click", (e) => {
e.preventDefault();
// Initialize the SpeechRecognition object
const recognition = new (window.webkitSpeechRecognition || window.SpeechRecognition)();
recognition.lang = "en-US";
recognition.addEventListener("result", recognitionResult);
// Start the recognition process
recognition.start();
listeningIndicator.style.visibility = 'visible';
return false;
});
// Handle the recognition result event
function recognitionResult(event) {
let final = "";
for (let i = 0; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
final += event.results[i][0].transcript;
}
}
listeningIndicator.style.visibility = 'hidden';
document.getElementsByTagName("textarea")[0].value = final;
const buttons = document.getElementsByTagName("button");
buttons[buttons.length - 1].click();
}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment