Skip to content

Instantly share code, notes, and snippets.

@antonmihaylov
Created December 1, 2022 11:17
Show Gist options
  • Save antonmihaylov/a708b7e93e71513a048d6615098198ea to your computer and use it in GitHub Desktop.
Save antonmihaylov/a708b7e93e71513a048d6615098198ea to your computer and use it in GitHub Desktop.
import { EventEmitter } from "eventemitter3";
import type { MP4ArrayBuffer, MP4File, MP4Info, MP4Sample } from "mp4box";
import * as MP4Box from "mp4box";
import type { MediaFile } from "../../models.js";
import type { IDisposable } from "../abstract/hooks.js";
import type {
VideoMetadata,
} from "../abstract/pipeline.js";
import type { MediaFileObjectUrlProvider } from "../utils/ObjectUrlProvider.js";
// Some internal dependencies are omitted because they are not relevant
// MediaFile is a wrapper for a native File: {file: File, id: string}
// IDisposable defines a dispose() method
// VideoMetadata - you can see it's definition bello in emit("videoMetadata"...
// MediaFileObjectUrlProvider - uses URL.createObjectURL, caches it and finally disposes it when dispose() is called on it
export interface DemuxerEventTypes {
config: [VideoDecoderConfig];
chunk: [EncodedVideoChunk];
videoMetadata: [VideoMetadata];
error: [Error];
end: [];
}
export interface IDemuxer extends EventEmitter<DemuxerEventTypes> {
start: (mediaFile: MediaFile) => void;
}
// Wraps an MP4Box File as a WritableStream underlying sink.
class MP4FileSink implements UnderlyingSink<Uint8Array> {
private readonly file: MP4File;
private offset = 0;
constructor(file: MP4File) {
this.file = file;
}
write(chunk: Uint8Array) {
// MP4Box.js requires buffers to be ArrayBuffers, but we have a Uint8Array.
const buffer = new ArrayBuffer(chunk.byteLength) as MP4ArrayBuffer;
new Uint8Array(buffer).set(chunk);
// Inform MP4Box where in the file this chunk is from.
buffer.fileStart = this.offset;
this.offset += buffer.byteLength;
// Append chunk.
this.file.appendBuffer(buffer);
}
close() {
this.file.flush();
}
}
// Demuxes the first video track of an MP4 file using MP4Box, calling
// `onConfig()` and `onChunk()` with appropriate WebCodecs objects.
export class Mp4Demuxer
extends EventEmitter<DemuxerEventTypes>
implements IDemuxer, IDisposable
{
private file?: MP4File;
private totalSamplesCount?: number;
private currentSamplesCount = 0;
constructor(
private readonly mediaFileUrlProvider: MediaFileObjectUrlProvider
) {
super();
}
start(mediaFile: MediaFile) {
if (this.file) {
throw new Error("demux() called twice");
}
// Configure an MP4Box File for demuxing.
this.file = MP4Box.createFile();
this.file.onError = (error) =>
this.emit("error", new Error(`MP4Box: ${error}`));
this.file.onReady = this.onReady.bind(this);
this.file.onSamples = this.onSamples.bind(this);
// Fetch the file and pipe the data through.
const fileSink = new MP4FileSink(this.file);
const url = this.mediaFileUrlProvider.get(mediaFile);
fetch(url)
.then(async (response) => {
if (!response.body) {
throw new Error("No response body");
}
// highWaterMark should be large enough for smooth streaming, but lower is
// better for memory usage.
return response.body.pipeTo(
new WritableStream(fileSink, { highWaterMark: 2 })
);
})
.catch((e) => {
console.error(e);
this.emit("error", e as Error);
});
}
dispose() {
this.file?.stop();
this.file = undefined;
this.removeAllListeners();
}
private description() {
const file = this.file;
// generate the property "description" for the object used in VideoDecoder.configure
// This function have been written by Thomas Guilbert from Google
// @ts-expect-error - MP4Box.js types are incomplete.
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access,@typescript-eslint/no-unsafe-assignment
const avccBox = file.moov.traks[0].mdia.minf.stbl.stsd.entries[0].avcC;
let i;
let size = 7;
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access,@typescript-eslint/no-unsafe-assignment,@typescript-eslint/restrict-plus-operands
for (i = 0; i < avccBox.SPS.length; i++) size += 2 + avccBox.SPS[i].length;
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access,@typescript-eslint/no-unsafe-assignment,@typescript-eslint/restrict-plus-operands
for (i = 0; i < avccBox.PPS.length; i++) size += 2 + avccBox.PPS[i].length;
let id = 0;
const data = new Uint8Array(size);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const writeUint8 = (value: any) => {
data.set([value], id);
id++;
};
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const writeUint16 = (value: any) => {
const arr = new Uint8Array(1);
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
arr[0] = value;
const buffer = new Uint8Array(arr.buffer);
data.set([buffer[1], buffer[0]], id);
id += 2;
};
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const writeUint8Array = (value: any) => {
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
data.set(value, id);
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
id += value.length;
};
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
writeUint8(avccBox.configurationVersion);
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
writeUint8(avccBox.AVCProfileIndication);
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
writeUint8(avccBox.profile_compatibility);
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
writeUint8(avccBox.AVCLevelIndication);
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access,@typescript-eslint/restrict-plus-operands
writeUint8(avccBox.lengthSizeMinusOne + (63 << 2));
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access,@typescript-eslint/restrict-plus-operands
writeUint8(avccBox.nb_SPS_nalus + (7 << 5));
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
for (i = 0; i < avccBox.SPS.length; i++) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
writeUint16(avccBox.SPS[i].length);
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
writeUint8Array(avccBox.SPS[i].nalu);
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
writeUint8(avccBox.nb_PPS_nalus);
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
for (i = 0; i < avccBox.PPS.length; i++) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
writeUint16(avccBox.PPS[i].length);
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
writeUint8Array(avccBox.PPS[i].nalu);
}
if (id !== size) throw new Error("size mismatched !");
return data;
}
private onReady(info: MP4Info) {
if (!this.file) {
throw new Error("MP4Box file not initialized");
}
const track = info.videoTracks[0];
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (!track) {
this.emit("error", new Error("No video tracks found"));
return;
}
this.totalSamplesCount = track.nb_samples;
// Generate and emit an appropriate VideoDecoderConfig.
this.emit("config", {
codec: track.codec,
codedHeight: track.video.height,
codedWidth: track.video.width,
description: this.description(),
} as VideoDecoderConfig);
const durationSec = info.duration / info.timescale;
const fps = Math.ceil(1 / (durationSec / track.nb_samples));
this.emit("videoMetadata", {
fps,
durationSec,
height: track.video.height,
width: track.video.width,
codec: track.codec,
} as VideoMetadata);
console.log(track.codec);
// Start demuxing.
this.file.setExtractionOptions(track.id, null, {
nbSamples: track.nb_samples,
});
this.file.start();
}
private async onSamples(
trackId: number,
ref: unknown,
samples: Array<MP4Sample>
) {
// Generate and emit an EncodedVideoChunk for each demuxed sample.
for (const sample of samples) {
this.emit(
"chunk",
new EncodedVideoChunk({
type: sample.is_sync ? "key" : "delta",
timestamp: 1000000 * (sample.cts / sample.timescale),
duration: 1000000 * (sample.duration / sample.timescale),
data: sample.data,
})
);
this.currentSamplesCount++;
if (this.currentSamplesCount === this.totalSamplesCount) {
this.emit("end");
}
}
}
}
import type { MediaFile } from "../../../models.js";
import type { PerfMonitor } from "../../utils/PerfMonitor.js";
import { frameToMs, msToMcs } from "../../utils/timing.js";
export interface IDemuxerFactory {
getDemuxer: (mediaFile: MediaFile) => Promise<IDemuxer>;
}
export interface IDecoder<TDecodeOut> {
initMediaFile: (
mediaFile: MediaFile,
context: InitContext
) => void | Promise<void>;
getFrameData: (
mediaFile: MediaFile,
context: DecodeContext
) => Promise<TDecodeOut | null> | TDecodeOut | null;
canDecode: (mediaFile: MediaFile) => boolean | Promise<boolean>;
}
export interface InitContext {
outputWidth: number;
outputHeight: number;
fps: number;
totalFrames: number;
}
export interface BaseExportContext extends InitContext {
frame: number;
}
export interface DecodeContext extends BaseExportContext {}
/**
* Uses MP4Box.js to demux the video and WebCodecs to decode it.
*/
export class WebCodecsDecoder implements IDecoder<VideoFrame> {
private static readonly MIN_FRAME_TIME_DIFF_MCS = 15 * 1000;
private frames?: Array<VideoFrame>;
constructor(
private readonly demuxerFactory: IDemuxerFactory,
private readonly optimizeForLatency: boolean,
) {}
async initMediaFile(mediaFile: MediaFile, context: InitContext) {
const demuxer = await this.demuxerFactory.getDemuxer(mediaFile);
const decoder = new VideoDecoder({
output: (videoFrame) => {
if (!videoFrame.timestamp) {
return;
}
this.frames ??= [];
this.frames.push(videoFrame);
},
error: (e) => {
console.error(e);
throw e;
},
});
demuxer.addListener("config", (config) =>
decoder.configure({
...config,
optimizeForLatency: this.optimizeForLatency,
})
);
demuxer.addListener("chunk", (chunk) => decoder.decode(chunk));
await new Promise<void>((resolve, reject) => {
demuxer.addListener("end", () => resolve());
demuxer.addListener("error", (e) => {
decoder.close();
reject(e);
});
demuxer.start(mediaFile);
});
await decoder.flush();
decoder. Close();
}
async canDecode(mediaFile: MediaFile): Promise<boolean> {
return mediaFile.file.type === "video/mp4";
}
async getFrameData(
mediaFile: MediaFile,
context: DecodeContext
): Promise<VideoFrame | null> {
const perfEntry = this.perf?.start("WebCodecsDecoder.getFrameData");
const timeMcs = msToMcs(frameToMs(context));
if (!this.frames) {
throw new Error("Decoder not initialized");
}
const frame = this.frames.find((f) => {
const diff = Math.abs(f.timestamp! - timeMcs);
return diff < WebCodecsDecoder.MIN_FRAME_TIME_DIFF_MCS;
});
perfEntry?.end();
return frame ?? null;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment