Last active
October 6, 2024 00:53
-
-
Save guest271314/5434de730617eda461b937591737ec33 to your computer and use it in GitHub Desktop.
MediaStreamTrackGenerator vs. AudioWorklet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
globalThis.Piper = class Piper { | |
constructor({ | |
text, | |
voice | |
} = { | |
text: "Speech synthesis.", | |
voice: "male", | |
}) { | |
// Encoded into Web extension iframe URL | |
this.params = new URLSearchParams(Object.entries({ | |
text, | |
voice | |
})); | |
this.url = new URL(args); | |
// Text parameter to piper as JSON | |
this.text = text; | |
// Voice. Implemented: "male" or "female" matching en_US-hfc_${voice}-medium.onnx | |
this.voice = voice; | |
// Verify bytes in arbitrary Web page is equal to bytes | |
// written to WritableStreamDefaultWriter in extension injected iframe. | |
this.bytes = 0; | |
// Web Audio API | |
this.latencyHint = 0; | |
this.channelCount = 1; | |
this.numberOfInputs = 1; | |
this.numberOfOutputs = 1; | |
// 1 channel s16 PCM, interleaved | |
this.sampleRate = 22050; | |
// AbortController to abort streams and audio playback | |
this.abortable = new AbortController(); | |
this.signal = this.abortable.signal; | |
// Web Audio API BaseAudioContext | |
this.ac = new AudioContext({ | |
latencyHint: this.latencyHint, | |
sampleRate: this.sampleRate, | |
}); | |
// Verify AudioContext state is closed on abort or complete | |
// bytes in transferableStream.js, readOffset in AudioWorkletProcessor | |
this.ac.addEventListener("statechange", (e) => { | |
console.log(`${e.target.constructor.name}.state ${e.target.state}`); | |
}, { | |
once: true | |
}); | |
this.msd = new MediaStreamAudioDestinationNode(this.ac, { | |
channelCount: this.channelCount, | |
}); | |
[this.track] = this.msd.stream.getAudioTracks(); | |
this.mediaStream = new MediaStream([this.track]); | |
this.msn = new MediaStreamAudioSourceNode(this.ac, { | |
mediaStream: this.mediaStream, | |
}); | |
this.msn.connect(this.ac.destination); | |
} | |
// Remove iframe when done streaming, stream aborted, or error, exception. | |
removeFrame() { | |
document.querySelectorAll(`[src*="${this.url.origin}"]`) | |
.forEach((iframe) => { | |
document.body.removeChild(iframe); | |
}); | |
this.transferableWindow = null; | |
} | |
abort(reason = "Stream aborted.") { | |
this.abortable.abort(reason); | |
} | |
async stream() { | |
// Web extension "web_accessible_resources" to communicate with iframe | |
// from and to arbitrary Web pages using Transferable Streams. | |
const { | |
resolve, | |
reject, | |
promise | |
} = Promise.withResolvers(); | |
this.promise = promise; | |
const handleMessage = (event) => { | |
if (event.origin === this.url.origin) { | |
// If event.data is ReadableStream pass ReadableStream | |
// and function to remove iframe from Web page when stream completes | |
if (event.data instanceof ReadableStream) { | |
resolve(event.data); | |
} else { | |
console.trace(); | |
reject(event.data); | |
} | |
} | |
}; | |
addEventListener("message", handleMessage, { | |
once: true | |
}); | |
this.transferableWindow = document.createElement("iframe"); | |
this.transferableWindow.style.display = "none"; | |
this.transferableWindow.name = location.href; | |
// Encode text and voice in chrome-extension: URL | |
this.transferableWindow.src = | |
`${this.url.href}?${this.params.toString()}`; | |
document.body.appendChild(this.transferableWindow); | |
this.readable = (await this.promise).pipeThrough(new TransformStream(), { | |
signal: this.signal, | |
}); | |
if ((!this.readable) instanceof ReadableStream) { | |
return this.abort(); | |
} | |
// AudioWorklet | |
class AudioWorkletProcessor {} | |
class ResizableArrayBufferAudioWorkletStream | |
extends AudioWorkletProcessor { | |
constructor(_options) { | |
super(); | |
this.readOffset = 0; | |
this.writeOffset = 0; | |
this.endOfStream = false; | |
this.ab = new ArrayBuffer(0, { | |
maxByteLength: (1024 ** 2) * 4, | |
}); | |
this.u8 = new Uint8Array(this.ab); | |
this.port.onmessage = (e) => { | |
this.readable = e.data; | |
this.stream(); | |
}; | |
} | |
int16ToFloat32(u16, channel) { | |
for (const [i, int] of u16.entries()) { | |
const float = int >= 0x8000 ? | |
-(0x10000 - int) / 0x8000 : | |
int / 0x7fff; | |
channel[i] = float; | |
} | |
} | |
async stream() { | |
try { | |
for await (const u8 of this.readable) { | |
const { | |
length | |
} = u8; | |
this.ab.resize(this.ab.byteLength + length); | |
this.u8.set(u8, this.readOffset); | |
this.readOffset += length; | |
} | |
console.log("Input strean closed."); | |
} catch (e) { | |
this.ab.resize(0); | |
this.port.postMessage({ | |
currentTime, | |
currentFrame, | |
readOffset: this.readOffset, | |
writeOffset: this.writeOffset, | |
e, | |
}); | |
} | |
} | |
process(_, [ | |
[output], | |
]) { | |
if (this.writeOffset > 0 && this.writeOffset >= this.readOffset) { | |
if (this.endOfStream === false) { | |
console.log("Output stream closed."); | |
this.endOfStream = true; | |
this.ab.resize(0); | |
this.port.postMessage({ | |
currentTime, | |
currentFrame, | |
readOffset: this.readOffset, | |
writeOffset: this.writeOffset, | |
}); | |
} | |
} | |
if (this.readOffset > 256 && this.writeOffset < this.readOffset) { | |
if (this.writeOffset === 0) { | |
console.log("Start output stream."); | |
} | |
const u8 = Uint8Array.from({ | |
length: 256 | |
}, | |
() => | |
this.writeOffset > this.readOffset ? | |
0 : | |
this.u8[this.writeOffset++], | |
); | |
const u16 = new Uint16Array(u8.buffer); | |
this.int16ToFloat32(u16, output); | |
} | |
return true; | |
} | |
} | |
// Register processor in AudioWorkletGlobalScope. | |
function registerProcessor(name, processorCtor) { | |
return `console.log(globalThis);\n${processorCtor};\n | |
registerProcessor('${name}', ${processorCtor.name});` | |
.replace(/\s+/g, " "); | |
} | |
const worklet = URL.createObjectURL( | |
new Blob([ | |
registerProcessor( | |
"resizable-arraybuffer-audio-worklet-stream", | |
ResizableArrayBufferAudioWorkletStream, | |
), | |
], { | |
type: "text/javascript" | |
}), | |
); | |
await this.ac.audioWorklet.addModule( | |
worklet, | |
); | |
try { | |
this.aw = new AudioWorkletNode( | |
this.ac, | |
"resizable-arraybuffer-audio-worklet-stream", { | |
numberOfInputs: this.numberOfInputs, | |
numberOfOutputs: this.numberOfOutputs, | |
channelCount: this.channelCount, | |
}, | |
); | |
} catch (e) { | |
console.log(e); | |
throw e; | |
} | |
// Transfer ReadableStream to AudioWorkeltProcessor scope. | |
this.aw.port.postMessage(this.readable, [this.readable]); | |
this.aw.connect(this.msd); | |
this.aw.onprocessorerror = (e) => { | |
console.error(e, "processorerror"); | |
console.trace(); | |
}; | |
const { | |
resolve: result, | |
promise: endOfStream | |
} = Promise.withResolvers(); | |
this.aw.port.onmessage = async (e) => { | |
this.ac.addEventListener("statechange", (event) => { | |
console.log( | |
`${event.target.constructor.name}.state ${event.target.state}`, | |
); | |
result({ | |
...e.data, | |
...this.ac.playoutStats.toJSON(), | |
}); | |
}, { | |
once: true | |
}); | |
await this.ac.close(); | |
}; | |
return endOfStream.finally( | |
() => (this.removeFrame()), | |
); | |
} | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
globalThis.Piper = class Piper { | |
constructor({ | |
text, | |
voice | |
} = { | |
text: "Speech synthesis.", | |
voice: "male", | |
}) { | |
// Encoded into Web extension iframe URL | |
this.params = new URLSearchParams(Object.entries({ | |
text, | |
voice | |
})); | |
this.url = new URL(args); | |
// Text parameter to piper as JSON | |
this.text = text; | |
// Voice. Implemented: "male" or "female" matching en_US-hfc_${voice}-medium.onnx | |
this.voice = voice; | |
// Verify bytes in arbitrary Web page is equal to bytes | |
// written to WritableStreamDefaultWriter in extension injected iframe. | |
this.bytes = 0; | |
// Count extra bytes used for silece to avoid clipping at start, end of stream. | |
this.extraBytes = 0; | |
// Web Audio API | |
this.latencyHint = 0; | |
this.frequency = 0; | |
this.channelCount = 1; | |
// Media Capture Transform MediaStreamTrackGenerator | |
this.kind = "audio"; | |
// 1 channel s16 PCM, interleaved | |
this.sampleRate = 22050; | |
this.numberOfChannels = 1; | |
// Frames per AudioData | |
this.numberOfFrames = 220; | |
// Byte length of Uint8Array per AudioData | |
this.byteLength = 440; | |
// WebCodecs AudioData format | |
this.format = "s16"; | |
// ReadableStream byte stream | |
this.type = "bytes"; | |
this.timestamp = 0; | |
// AbortController to abort streams and audio playback | |
this.abortable = new AbortController(); | |
this.signal = this.abortable.signal; | |
// Readable byte stream | |
this.bytestream = new ReadableStream({ | |
type: this.type, | |
start: (c) => { | |
// Byte stream controller | |
return this.bytestreamController = c; | |
}, | |
}); | |
// Readable byttestream reader | |
this.reader = new ReadableStreamBYOBReader(this.bytestream); | |
// Web Audio API BaseAudioContext | |
this.ac = new AudioContext({ | |
latencyHint: this.latencyHint, | |
sampleRate: this.sampleRate, | |
}); | |
// Verify AudioContext state is closed on abort or complete | |
// bytes in transferableStream.js, readOffset in AudioWorkletProcessor | |
this.ac.addEventListener("statechange", (e) => { | |
console.log(`${e.target.constructor.name}.state ${e.target.state}`); | |
}, { | |
once: true | |
}); | |
this.osc = new OscillatorNode(this.ac, { | |
frequency: this.frequency, | |
channelCount: this.channelCount, | |
}); | |
this.msd = new MediaStreamAudioDestinationNode(this.ac, { | |
channelCount: this.channelCount, | |
}); | |
[this.track] = this.msd.stream.getAudioTracks(); | |
// Get timestamp from WebCodecs AudioData produced by silence stream | |
// from OscillatorNode to MediaStreamAudioDestinationNode | |
this.processor = new MediaStreamTrackProcessor({ | |
track: this.track, | |
}); | |
// Write "s16" (S16_LE) PCM as Uint8Array to MediaStreamTrackGenerator writable | |
this.generator = new MediaStreamTrackGenerator({ | |
kind: this.kind, | |
}); | |
this.audioWriter = this.generator.writable.getWriter(); | |
this.mediaStream = new MediaStream([this.generator]); | |
this.msn = new MediaStreamAudioSourceNode(this.ac, { | |
mediaStream: this.mediaStream, | |
}); | |
} | |
// Remove iframe when done streaming, stream aborted, or error, exception. | |
removeFrame() { | |
document.querySelectorAll(`[src*="${this.url.origin}"]`) | |
.forEach((iframe) => { | |
document.body.removeChild(iframe); | |
}); | |
this.transferableWindow = null; | |
} | |
abort(reason = "Stream aborted.") { | |
this.abortable.abort(reason); | |
} | |
async stream() { | |
// Web extension "web_accessible_resources" to communicate with iframe | |
// from and to arbitrary Web pages using Transferable Streams. | |
const { | |
resolve, | |
reject, | |
promise | |
} = Promise.withResolvers(); | |
this.osc.connect(this.msd); | |
this.msn.connect(this.ac.destination); | |
this.promise = promise; | |
const handleMessage = (event) => { | |
if (event.origin === this.url.origin) { | |
// If event.data is ReadableStream pass ReadableStream | |
// and function to remove iframe from Web page when stream completes | |
if (event.data instanceof ReadableStream) { | |
resolve(event.data); | |
} else { | |
console.trace(); | |
reject(event.data); | |
} | |
} | |
}; | |
addEventListener("message", handleMessage, { | |
once: true | |
}); | |
this.transferableWindow = document.createElement("iframe"); | |
this.transferableWindow.style.display = "none"; | |
this.transferableWindow.name = location.href; | |
// Encode text and voice in chrome-extension: URL | |
this.transferableWindow.src = | |
`${this.url.href}?${this.params.toString()}`; | |
document.body.appendChild(this.transferableWindow); | |
this.readable = await this.promise; | |
if ((!this.readable) instanceof ReadableStream) { | |
return this.abort(); | |
} | |
return await Promise.allSettled([ | |
this.readable.pipeTo( | |
new WritableStream({ | |
write: (u8) => { | |
this.bytes += u8.length; | |
this.bytestreamController.enqueue(u8); | |
}, | |
close: () => { | |
this.bytestreamController.close(); | |
// (this.generator.stats.toJSON().totalFrames/2)-this.extraBytes | |
console.log("Input stream closed."); | |
}, | |
// Verify abort reason propagates. | |
abort: async (reason) => { | |
console.log({ | |
reason, | |
}); | |
this.bytestreamController.close(); | |
await this.audioWriter.close(); | |
}, | |
}), { | |
signal: this.signal, | |
}, | |
).then(() => this.generator.stats.toJSON()), | |
this.processor.readable.pipeTo( | |
new WritableStream({ | |
start: async () => { | |
// Avoid clipping of initial MediaStreamTrack playback, with | |
// silence before playback begins. | |
let silence = new AudioData({ | |
sampleRate: this.sampleRate, | |
numberOfChannels: this.numberOfChannels, | |
numberOfFrames: this.numberOfFrames, | |
format: this.format, | |
timestamp: this.timestamp, | |
data: new Uint8Array(this.byteLength), | |
}); | |
this.timestamp += silence.duration; | |
// console.log(silence.duration/10**6); | |
await this.audioWriter.write(silence); | |
// Count extra bytes used to insert silence at start, end of stream. | |
this.extraBytes += this.byteLength * 2; | |
console.log("Start output stream."); | |
}, | |
write: async (audioData, c) => { | |
// Get timestamp from AudioData stream of silence | |
// from OscillatorNode connected to MedisStreamAudioDestinationNode | |
// using MediaStreamTrackProcessor. | |
// Manually incrementing timestamp with | |
// basetime = 0; timestamp: basetime * 10**6; | |
// basetime += audioData.duration ; | |
// accounting for latency, asynchronous processes, to create | |
// WebCodecs AudioData timestamp for live MediaStreamTrack non-trivial. | |
const { | |
timestamp | |
} = audioData; | |
let { | |
value: data, | |
done | |
} = await this.reader.read( | |
new Uint8Array(this.byteLength), { | |
min: this.byteLength, | |
}, | |
); | |
// Avoid clipping. | |
// Fill last frames of AudioData with silence | |
// when frames are less than 440 | |
if (data?.length < this.byteLength) { | |
this.extraBytes += this.byteLength - data.length; | |
const u8 = new Uint8Array(this.byteLength); | |
u8.set(data, 0); | |
data = u8; | |
} | |
// console.log(audioWriter.desiredSize, done); | |
if (done) { | |
// Stop MediaStreamTrack of MediaStreamAudioDestinationNode | |
// and close MediaStreamTrackGenerator WritableStreamDefaultWriter. | |
// Delay track.stop() for 100 milliseconds to avoid clipping | |
// end of audio playback. | |
if (this.signal.aborted) { | |
this.track.stop(); | |
return c.error(this.signal.reason); | |
} | |
await this.audioWriter.close(); | |
return await scheduler.postTask(() => this.track.stop(), { | |
priority: "background", | |
delay: 100, | |
}); | |
} | |
if (this.signal.aborted) { | |
return; | |
} | |
await this.audioWriter.ready; | |
const ad = new AudioData({ | |
sampleRate: this.sampleRate, | |
numberOfChannels: this.numberOfChannels, | |
// data.buffer.byteLength / 2, | |
numberOfFrames: this.numberOfFrames, | |
format: this.format, | |
timestamp: this.timestamp * 10 ** 6, | |
data, | |
}); | |
this.timestamp += ad.duration; | |
// Write Uint8Array representation of 1 channel S16 PCM | |
await this.audioWriter.write( | |
ad, | |
).catch((e) => { | |
console.warn(e); | |
}); | |
}, | |
close: () => { | |
console.log("Output stream closed."); | |
// this.track.stop(); | |
// Remove Web extension injected HTML iframe. | |
// Used for messaging data from piper with Native Messaging protocol | |
// to TransformStream where the readable side is transferred to | |
// the Web page and read | |
this.removeFrame(); | |
}, | |
// Handle this.abortable.abort("reason"); | |
abort(reason) { | |
console.log(reason); | |
}, | |
}), | |
).then(() => ({ | |
bytes: this.bytes, | |
extraBytes: this.extraBytes, | |
})), | |
]).finally(() => | |
Promise.all([ | |
new Promise(async (resolve) => { | |
this.ac.addEventListener("statechange", (event) => { | |
console.log( | |
`${event.target.constructor.name}.state ${event.target.state}`, | |
); | |
resolve(); | |
}, { | |
once: true | |
}); | |
await this.ac.close(); | |
}), | |
this.removeFrame(), | |
]) | |
); | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment