Skip to content

Instantly share code, notes, and snippets.

@mbrock
Created October 26, 2023 11:03
Show Gist options
  • Save mbrock/51a853f1375c9f6d6c42b1c413d3232f to your computer and use it in GitHub Desktop.
Save mbrock/51a853f1375c9f6d6c42b1c413d3232f to your computer and use it in GitHub Desktop.
swa.sh
// swa.sh - a tool, for naught
// Copyright (C) 2023 Mikael Brockman
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
function zb32word() {
const base = "ybndrfg8ejkmcpqxot1uwisza345h769"
const array = new Int32Array(1)
window.crypto.getRandomValues(array)
const i = array[0]
return (
base[(i >>> 27) & 0x1f] +
base[(i >>> 22) & 0x1f] +
base[(i >>> 17) & 0x1f] +
base[(i >>> 12) & 0x1f] +
base[(i >>> 7) & 0x1f] +
base[(i >>> 2) & 0x1f]
)
}
function gensym() {
return `${zb32word()}${zb32word()}`
}
class Stream {
constructor(setup) {
this.buffer = []
const next = value => {
if (this.promise) {
this.resolve({ value, done: false })
this.promise = null
} else {
this.buffer.push(value)
}
}
const stop = () => {
this.resolve({ done: true })
}
const fail = error => {
this.reject(error)
}
setup({ next, stop, fail })
}
async next() {
if (this.buffer.length > 0) {
return Promise.resolve({
value: this.buffer.shift(),
done: false,
})
}
if (!this.promise) {
this.promise = new Promise((r, e) => {
this.resolve = r
this.reject = e
})
}
return this.promise
}
return() {
this.resolve({ done: true })
return Promise.resolve({ done: true })
}
throw(error) {
this.reject(error)
}
[Symbol.asyncIterator]() {
return this
}
static async *merge(iterators) {
const promises = iterators.map((iterator, index) =>
iterator.next().then(result => ({ ...result, source: index }))
)
while (promises.length > 0) {
const nextPromise = Promise.race(promises)
const { value, done, source } = await nextPromise
if (done) {
const index = promises.findIndex((_, i) => i === source)
if (index !== -1) {
promises.splice(index, 1)
}
} else {
yield value
promises[source] = iterators[source]
.next()
.then(result => ({ ...result, source }))
}
}
}
}
class BaseComponent extends HTMLElement {
constructor(templateContent) {
super()
this.attachShadow({ mode: "open" })
this.appendTemplate(templateContent)
}
$(selector) {
return this.shadowRoot.querySelector(selector)
}
$$(selector) {
return this.shadowRoot.querySelectorAll(selector)
}
appendTemplate(templateContent) {
const template = document.createElement("template")
template.innerHTML = templateContent
this.shadowRoot.appendChild(template.content.cloneNode(true))
}
tag(tagName, attributes = {}, children = []) {
const element = document.createElement(tagName)
Object.keys(attributes).forEach(key => {
element.setAttribute(key, attributes[key])
})
children.forEach(child => {
if (typeof child === "string") {
child = document.createTextNode(child)
} else if (child instanceof HTMLElement) {
// do nothing
} else {
throw new Error("Invalid child type")
}
element.appendChild(child)
})
return element
}
}
function speechRecognitionEventStream({ language = "en-US" }) {
return new Stream(({ next, fail }) => {
const recognition = new (window.SpeechRecognition ||
window.webkitSpeechRecognition)()
recognition.interimResults = true
recognition.continuous = true
recognition.lang = language
recognition.onresult = event => {
const timestamp = new Date().toISOString()
next({ type: "Result", timestamp })
Array.from(event.results)
.slice(event.resultIndex)
.forEach(result => {
next({
type: result.isFinal ? "FinalTranscript" : "InterimTranscript",
transcript: result[0].transcript,
grade: result.isFinal
? confidenceGrade(result[0].confidence)
: undefined,
timestamp,
id: gensym(),
})
})
}
recognition.onerror = error => {
if (error.error === "no-speech") {
next({ type: "NoSpeech", timestamp: new Date().toISOString() })
} else if (error.error === "network") {
next({ type: "NetworkDown" })
} else {
fail(error)
}
}
recognition.onend = () => {
recognition.start()
}
recognition.start()
})
}
class AudioRecorder {
constructor() {
this.mediaRecorder = null
this.chunks = []
this.stream = null
this.startTime = null
}
async setup() {
if (!this.stream) {
this.stream = await navigator.mediaDevices.getUserMedia({ audio: true })
this.mediaRecorder = new MediaRecorder(this.stream)
this.mediaRecorder.ondataavailable = e => {
this.chunks.push(e.data)
}
}
}
async start() {
await this.setup()
if (this.mediaRecorder.state === "inactive") {
this.mediaRecorder.start(100)
this.startTime = Date.now()
}
}
dump() {
const blob = new Blob(this.chunks, { type: "audio/webm; codecs=opus" })
return blob
}
stop() {
return new Promise(resolve => {
this.mediaRecorder.onstop = () => {
const blob = this.dump()
this.chunks = []
resolve(blob)
}
this.mediaRecorder.stop()
})
}
async restart() {
console.info("restarting audio")
const blob = await this.stop()
await this.start()
return blob
}
}
async function transcribe({ file, token, language = "en", prompt = "" }) {
const formData = new FormData()
formData.append("file", file, "audio.webm")
formData.append("model", "whisper-1")
formData.append("response_format", "verbose_json")
formData.append("prompt", prompt)
formData.append("language", language)
const response = await fetch(
"https://api.openai.com/v1/audio/transcriptions",
{
method: "POST",
body: formData,
headers: {
Authorization: `Bearer ${token}`,
},
}
)
if (!response.ok) {
console.error(await response.text())
throw new Error(`HTTP error! status: ${response.status}`)
}
return await response.json()
}
async function demand({ key, message = key }) {
return new Promise(resolve => {
const x = localStorage.getItem(key) || prompt(message)
localStorage.setItem(key, x)
resolve(x)
})
}
class ResettableTimer {
constructor(timeoutDuration, onTimeout) {
this.timeoutDuration = timeoutDuration
this.onTimeout = onTimeout
this.timeoutId = null
}
start() {
this.reset()
}
reset() {
clearTimeout(this.timeoutId)
this.timeoutId = setTimeout(this.onTimeout, this.timeoutDuration)
}
stop() {
clearTimeout(this.timeoutId)
this.timeoutId = null
}
}
class SwashDictaphone extends BaseComponent {
constructor() {
super(`
<link rel="stylesheet" href="index.css">
<article>
<div class="final"><p></p></div>
<div class="interim"></div>
</article>
<audio controls></audio>
`)
}
async connectedCallback() {
this.db = this.getAttribute("db")
this.loadAndHandleEvents()
const language = this.getAttribute("lang") || "en-US"
this.shortLanguage = language.split("-")[0]
this.recognitionEventStream = speechRecognitionEventStream({
language,
})
this.recorder = new AudioRecorder()
await this.recorder.start()
this.timer = new ResettableTimer(5000, async () => {
const blob = await this.recorder.restart()
if (!this.$(".final p:empty:last-child")) {
this.$(".final").appendChild(this.tag("p"))
}
this.timer.reset()
})
for await (const event of this.recognitionEventStream) {
console.log("ok", event)
this.handleEvent(event, true)
}
}
loadAndHandleEvents() {
const events = JSON.parse(localStorage.getItem(this.db) || "[]")
events.forEach(event => this.handleEvent(event, false))
}
saveEvent(event) {
let events = JSON.parse(localStorage.getItem(this.db) || "[]")
events = [...events, event]
localStorage.setItem(this.db, JSON.stringify(events))
}
reset() {
localStorage.removeItem(this.db)
this.$(".final").innerHTML = ""
this.$(".interim").textContent = ""
}
async handleEvent(event, shouldSave) {
if (shouldSave) {
this.saveEvent(event)
}
const eventTypeHandlers = {
Result: async () => {
this.$(".interim").textContent = ""
},
FinalTranscript: async event => {
const commandFunc = {
"reset bro": () => this.reset(),
}[event.transcript.trim().toLowerCase()]
if (commandFunc) {
await commandFunc()
} else {
let recording = this.tag(
"span",
{
"data-grade": event.grade,
"data-id": event.id,
"data-timestamp": event.timestamp,
class: shouldSave ? "recording" : "",
},
[event.transcript]
)
this.$(".final p:last-of-type").appendChild(recording)
if (shouldSave) {
const p = this.$(".final p:last-of-type")
const target = this.tag("span", {
class: "whisper transcription pending",
})
p.appendChild(target)
const transcription = await transcribe({
file: this.recorder.dump(),
token: await demand({
key: "openai-token",
message: "Please enter your OpenAI API token",
}),
language: this.shortLanguage,
})
target.classList.remove("pending")
target.classList.add("done")
// remove all other transcriptions in the same paragraph
for (const span of p.querySelectorAll(".whisper.transcription")) {
if (span !== target) {
span.remove()
}
}
recording.remove()
// {"task":"transcribe","language":"english","duration":2.94,"segments":[{"id":0,"seek":0,"start":0.0,"end":3.0,"text":" Hello.","tokens":[50364,2425,13,50514],"temperature":0.0,"avg_logprob":-0.936490821838379,"compression_ratio":0.42857142857142855,"no_speech_prob":0.2167164534330368,"transient":false}],"text":"Hello."}
console.info(transcription)
target.textContent = transcription.text
}
this.$(".interim").textContent = ""
}
},
InterimTranscript: async event => {
this.$(".interim").textContent += event.transcript
if (shouldSave) {
this.timer.reset()
}
},
NoSpeech: async event => {
if (shouldSave) {
}
},
}
const handlerFunc = eventTypeHandlers[event.type]
if (handlerFunc) {
await handlerFunc(event)
}
// scroll to bottom smoothly, centering the last line
this.$(".final > :last-child, .interim").scrollIntoView({
behavior: "smooth",
block: "center",
})
}
}
// Define the new element
customElements.define("swash-dictaphone", SwashDictaphone)
function confidenceGrade(confidence) {
let grade
if (confidence > 0.95) {
grade = "A+"
} else if (confidence > 0.9) {
grade = "A"
} else if (confidence > 0.8) {
grade = "B"
} else if (confidence > 0.7) {
grade = "C"
} else if (confidence > 0.6) {
grade = "D"
} else {
grade = "F"
}
return grade
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment