Skip to content

Instantly share code, notes, and snippets.

@guest271314
Last active October 6, 2024 00:53
Show Gist options
  • Save guest271314/5434de730617eda461b937591737ec33 to your computer and use it in GitHub Desktop.
Save guest271314/5434de730617eda461b937591737ec33 to your computer and use it in GitHub Desktop.
MediaStreamTrackGenerator vs. AudioWorklet
globalThis.Piper = class Piper {
constructor({
text,
voice
} = {
text: "Speech synthesis.",
voice: "male",
}) {
// Encoded into Web extension iframe URL
this.params = new URLSearchParams(Object.entries({
text,
voice
}));
this.url = new URL(args);
// Text parameter to piper as JSON
this.text = text;
// Voice. Implemented: "male" or "female" matching en_US-hfc_${voice}-medium.onnx
this.voice = voice;
// Verify bytes in arbitrary Web page is equal to bytes
// written to WritableStreamDefaultWriter in extension injected iframe.
this.bytes = 0;
// Web Audio API
this.latencyHint = 0;
this.channelCount = 1;
this.numberOfInputs = 1;
this.numberOfOutputs = 1;
// 1 channel s16 PCM, interleaved
this.sampleRate = 22050;
// AbortController to abort streams and audio playback
this.abortable = new AbortController();
this.signal = this.abortable.signal;
// Web Audio API BaseAudioContext
this.ac = new AudioContext({
latencyHint: this.latencyHint,
sampleRate: this.sampleRate,
});
// Verify AudioContext state is closed on abort or complete
// bytes in transferableStream.js, readOffset in AudioWorkletProcessor
this.ac.addEventListener("statechange", (e) => {
console.log(`${e.target.constructor.name}.state ${e.target.state}`);
}, {
once: true
});
this.msd = new MediaStreamAudioDestinationNode(this.ac, {
channelCount: this.channelCount,
});
[this.track] = this.msd.stream.getAudioTracks();
this.mediaStream = new MediaStream([this.track]);
this.msn = new MediaStreamAudioSourceNode(this.ac, {
mediaStream: this.mediaStream,
});
this.msn.connect(this.ac.destination);
}
// Remove iframe when done streaming, stream aborted, or error, exception.
removeFrame() {
document.querySelectorAll(`[src*="${this.url.origin}"]`)
.forEach((iframe) => {
document.body.removeChild(iframe);
});
this.transferableWindow = null;
}
abort(reason = "Stream aborted.") {
this.abortable.abort(reason);
}
async stream() {
// Web extension "web_accessible_resources" to communicate with iframe
// from and to arbitrary Web pages using Transferable Streams.
const {
resolve,
reject,
promise
} = Promise.withResolvers();
this.promise = promise;
const handleMessage = (event) => {
if (event.origin === this.url.origin) {
// If event.data is ReadableStream pass ReadableStream
// and function to remove iframe from Web page when stream completes
if (event.data instanceof ReadableStream) {
resolve(event.data);
} else {
console.trace();
reject(event.data);
}
}
};
addEventListener("message", handleMessage, {
once: true
});
this.transferableWindow = document.createElement("iframe");
this.transferableWindow.style.display = "none";
this.transferableWindow.name = location.href;
// Encode text and voice in chrome-extension: URL
this.transferableWindow.src =
`${this.url.href}?${this.params.toString()}`;
document.body.appendChild(this.transferableWindow);
this.readable = (await this.promise).pipeThrough(new TransformStream(), {
signal: this.signal,
});
if ((!this.readable) instanceof ReadableStream) {
return this.abort();
}
// AudioWorklet
class AudioWorkletProcessor {}
class ResizableArrayBufferAudioWorkletStream
extends AudioWorkletProcessor {
constructor(_options) {
super();
this.readOffset = 0;
this.writeOffset = 0;
this.endOfStream = false;
this.ab = new ArrayBuffer(0, {
maxByteLength: (1024 ** 2) * 4,
});
this.u8 = new Uint8Array(this.ab);
this.port.onmessage = (e) => {
this.readable = e.data;
this.stream();
};
}
int16ToFloat32(u16, channel) {
for (const [i, int] of u16.entries()) {
const float = int >= 0x8000 ?
-(0x10000 - int) / 0x8000 :
int / 0x7fff;
channel[i] = float;
}
}
async stream() {
try {
for await (const u8 of this.readable) {
const {
length
} = u8;
this.ab.resize(this.ab.byteLength + length);
this.u8.set(u8, this.readOffset);
this.readOffset += length;
}
console.log("Input strean closed.");
} catch (e) {
this.ab.resize(0);
this.port.postMessage({
currentTime,
currentFrame,
readOffset: this.readOffset,
writeOffset: this.writeOffset,
e,
});
}
}
process(_, [
[output],
]) {
if (this.writeOffset > 0 && this.writeOffset >= this.readOffset) {
if (this.endOfStream === false) {
console.log("Output stream closed.");
this.endOfStream = true;
this.ab.resize(0);
this.port.postMessage({
currentTime,
currentFrame,
readOffset: this.readOffset,
writeOffset: this.writeOffset,
});
}
}
if (this.readOffset > 256 && this.writeOffset < this.readOffset) {
if (this.writeOffset === 0) {
console.log("Start output stream.");
}
const u8 = Uint8Array.from({
length: 256
},
() =>
this.writeOffset > this.readOffset ?
0 :
this.u8[this.writeOffset++],
);
const u16 = new Uint16Array(u8.buffer);
this.int16ToFloat32(u16, output);
}
return true;
}
}
// Register processor in AudioWorkletGlobalScope.
function registerProcessor(name, processorCtor) {
return `console.log(globalThis);\n${processorCtor};\n
registerProcessor('${name}', ${processorCtor.name});`
.replace(/\s+/g, " ");
}
const worklet = URL.createObjectURL(
new Blob([
registerProcessor(
"resizable-arraybuffer-audio-worklet-stream",
ResizableArrayBufferAudioWorkletStream,
),
], {
type: "text/javascript"
}),
);
await this.ac.audioWorklet.addModule(
worklet,
);
try {
this.aw = new AudioWorkletNode(
this.ac,
"resizable-arraybuffer-audio-worklet-stream", {
numberOfInputs: this.numberOfInputs,
numberOfOutputs: this.numberOfOutputs,
channelCount: this.channelCount,
},
);
} catch (e) {
console.log(e);
throw e;
}
// Transfer ReadableStream to AudioWorkeltProcessor scope.
this.aw.port.postMessage(this.readable, [this.readable]);
this.aw.connect(this.msd);
this.aw.onprocessorerror = (e) => {
console.error(e, "processorerror");
console.trace();
};
const {
resolve: result,
promise: endOfStream
} = Promise.withResolvers();
this.aw.port.onmessage = async (e) => {
this.ac.addEventListener("statechange", (event) => {
console.log(
`${event.target.constructor.name}.state ${event.target.state}`,
);
result({
...e.data,
...this.ac.playoutStats.toJSON(),
});
}, {
once: true
});
await this.ac.close();
};
return endOfStream.finally(
() => (this.removeFrame()),
);
}
};
globalThis.Piper = class Piper {
constructor({
text,
voice
} = {
text: "Speech synthesis.",
voice: "male",
}) {
// Encoded into Web extension iframe URL
this.params = new URLSearchParams(Object.entries({
text,
voice
}));
this.url = new URL(args);
// Text parameter to piper as JSON
this.text = text;
// Voice. Implemented: "male" or "female" matching en_US-hfc_${voice}-medium.onnx
this.voice = voice;
// Verify bytes in arbitrary Web page is equal to bytes
// written to WritableStreamDefaultWriter in extension injected iframe.
this.bytes = 0;
// Count extra bytes used for silece to avoid clipping at start, end of stream.
this.extraBytes = 0;
// Web Audio API
this.latencyHint = 0;
this.frequency = 0;
this.channelCount = 1;
// Media Capture Transform MediaStreamTrackGenerator
this.kind = "audio";
// 1 channel s16 PCM, interleaved
this.sampleRate = 22050;
this.numberOfChannels = 1;
// Frames per AudioData
this.numberOfFrames = 220;
// Byte length of Uint8Array per AudioData
this.byteLength = 440;
// WebCodecs AudioData format
this.format = "s16";
// ReadableStream byte stream
this.type = "bytes";
this.timestamp = 0;
// AbortController to abort streams and audio playback
this.abortable = new AbortController();
this.signal = this.abortable.signal;
// Readable byte stream
this.bytestream = new ReadableStream({
type: this.type,
start: (c) => {
// Byte stream controller
return this.bytestreamController = c;
},
});
// Readable byttestream reader
this.reader = new ReadableStreamBYOBReader(this.bytestream);
// Web Audio API BaseAudioContext
this.ac = new AudioContext({
latencyHint: this.latencyHint,
sampleRate: this.sampleRate,
});
// Verify AudioContext state is closed on abort or complete
// bytes in transferableStream.js, readOffset in AudioWorkletProcessor
this.ac.addEventListener("statechange", (e) => {
console.log(`${e.target.constructor.name}.state ${e.target.state}`);
}, {
once: true
});
this.osc = new OscillatorNode(this.ac, {
frequency: this.frequency,
channelCount: this.channelCount,
});
this.msd = new MediaStreamAudioDestinationNode(this.ac, {
channelCount: this.channelCount,
});
[this.track] = this.msd.stream.getAudioTracks();
// Get timestamp from WebCodecs AudioData produced by silence stream
// from OscillatorNode to MediaStreamAudioDestinationNode
this.processor = new MediaStreamTrackProcessor({
track: this.track,
});
// Write "s16" (S16_LE) PCM as Uint8Array to MediaStreamTrackGenerator writable
this.generator = new MediaStreamTrackGenerator({
kind: this.kind,
});
this.audioWriter = this.generator.writable.getWriter();
this.mediaStream = new MediaStream([this.generator]);
this.msn = new MediaStreamAudioSourceNode(this.ac, {
mediaStream: this.mediaStream,
});
}
// Remove iframe when done streaming, stream aborted, or error, exception.
removeFrame() {
document.querySelectorAll(`[src*="${this.url.origin}"]`)
.forEach((iframe) => {
document.body.removeChild(iframe);
});
this.transferableWindow = null;
}
abort(reason = "Stream aborted.") {
this.abortable.abort(reason);
}
async stream() {
// Web extension "web_accessible_resources" to communicate with iframe
// from and to arbitrary Web pages using Transferable Streams.
const {
resolve,
reject,
promise
} = Promise.withResolvers();
this.osc.connect(this.msd);
this.msn.connect(this.ac.destination);
this.promise = promise;
const handleMessage = (event) => {
if (event.origin === this.url.origin) {
// If event.data is ReadableStream pass ReadableStream
// and function to remove iframe from Web page when stream completes
if (event.data instanceof ReadableStream) {
resolve(event.data);
} else {
console.trace();
reject(event.data);
}
}
};
addEventListener("message", handleMessage, {
once: true
});
this.transferableWindow = document.createElement("iframe");
this.transferableWindow.style.display = "none";
this.transferableWindow.name = location.href;
// Encode text and voice in chrome-extension: URL
this.transferableWindow.src =
`${this.url.href}?${this.params.toString()}`;
document.body.appendChild(this.transferableWindow);
this.readable = await this.promise;
if ((!this.readable) instanceof ReadableStream) {
return this.abort();
}
return await Promise.allSettled([
this.readable.pipeTo(
new WritableStream({
write: (u8) => {
this.bytes += u8.length;
this.bytestreamController.enqueue(u8);
},
close: () => {
this.bytestreamController.close();
// (this.generator.stats.toJSON().totalFrames/2)-this.extraBytes
console.log("Input stream closed.");
},
// Verify abort reason propagates.
abort: async (reason) => {
console.log({
reason,
});
this.bytestreamController.close();
await this.audioWriter.close();
},
}), {
signal: this.signal,
},
).then(() => this.generator.stats.toJSON()),
this.processor.readable.pipeTo(
new WritableStream({
start: async () => {
// Avoid clipping of initial MediaStreamTrack playback, with
// silence before playback begins.
let silence = new AudioData({
sampleRate: this.sampleRate,
numberOfChannels: this.numberOfChannels,
numberOfFrames: this.numberOfFrames,
format: this.format,
timestamp: this.timestamp,
data: new Uint8Array(this.byteLength),
});
this.timestamp += silence.duration;
// console.log(silence.duration/10**6);
await this.audioWriter.write(silence);
// Count extra bytes used to insert silence at start, end of stream.
this.extraBytes += this.byteLength * 2;
console.log("Start output stream.");
},
write: async (audioData, c) => {
// Get timestamp from AudioData stream of silence
// from OscillatorNode connected to MedisStreamAudioDestinationNode
// using MediaStreamTrackProcessor.
// Manually incrementing timestamp with
// basetime = 0; timestamp: basetime * 10**6;
// basetime += audioData.duration ;
// accounting for latency, asynchronous processes, to create
// WebCodecs AudioData timestamp for live MediaStreamTrack non-trivial.
const {
timestamp
} = audioData;
let {
value: data,
done
} = await this.reader.read(
new Uint8Array(this.byteLength), {
min: this.byteLength,
},
);
// Avoid clipping.
// Fill last frames of AudioData with silence
// when frames are less than 440
if (data?.length < this.byteLength) {
this.extraBytes += this.byteLength - data.length;
const u8 = new Uint8Array(this.byteLength);
u8.set(data, 0);
data = u8;
}
// console.log(audioWriter.desiredSize, done);
if (done) {
// Stop MediaStreamTrack of MediaStreamAudioDestinationNode
// and close MediaStreamTrackGenerator WritableStreamDefaultWriter.
// Delay track.stop() for 100 milliseconds to avoid clipping
// end of audio playback.
if (this.signal.aborted) {
this.track.stop();
return c.error(this.signal.reason);
}
await this.audioWriter.close();
return await scheduler.postTask(() => this.track.stop(), {
priority: "background",
delay: 100,
});
}
if (this.signal.aborted) {
return;
}
await this.audioWriter.ready;
const ad = new AudioData({
sampleRate: this.sampleRate,
numberOfChannels: this.numberOfChannels,
// data.buffer.byteLength / 2,
numberOfFrames: this.numberOfFrames,
format: this.format,
timestamp: this.timestamp * 10 ** 6,
data,
});
this.timestamp += ad.duration;
// Write Uint8Array representation of 1 channel S16 PCM
await this.audioWriter.write(
ad,
).catch((e) => {
console.warn(e);
});
},
close: () => {
console.log("Output stream closed.");
// this.track.stop();
// Remove Web extension injected HTML iframe.
// Used for messaging data from piper with Native Messaging protocol
// to TransformStream where the readable side is transferred to
// the Web page and read
this.removeFrame();
},
// Handle this.abortable.abort("reason");
abort(reason) {
console.log(reason);
},
}),
).then(() => ({
bytes: this.bytes,
extraBytes: this.extraBytes,
})),
]).finally(() =>
Promise.all([
new Promise(async (resolve) => {
this.ac.addEventListener("statechange", (event) => {
console.log(
`${event.target.constructor.name}.state ${event.target.state}`,
);
resolve();
}, {
once: true
});
await this.ac.close();
}),
this.removeFrame(),
])
);
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment