Skip to content

Instantly share code, notes, and snippets.

@unarist
Last active February 7, 2021 18:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save unarist/bf2d9d9960d900107cb27e821242be0d to your computer and use it in GitHub Desktop.
Save unarist/bf2d9d9960d900107cb27e821242be0d to your computer and use it in GitHub Desktop.
DenoでEBMLパース
import { DOMParser, Element } from "https://deno.land/x/deno_dom@v0.1.3-alpha2/deno-dom-wasm.ts";
// https://github.com/ietf-wg-cellar/ebml-specification/blob/master/specification.markdown#ebml-header-elements
let elementDefs = [
{ path: "\\EBML", id: 0x1A45DFA3, type: "master" },
{ path: "\\EBML\\EBMLVersion", id: 0x4286, type: "uinteger" },
{ path: "\\EBML\\EBMLReadVersion", id: 0x42F7, type: "uinteger" },
{ path: "\\EBML\\EBMLMaxIDLength", id: 0x42F2, type: "uinteger" },
{ path: "\\EBML\\EBMLMaxSizeLength", id: 0x42F3, type: "uinteger" },
{ path: "\\EBML\\DocType", id: 0x4282, type: "string" },
{ path: "\\EBML\\DocTypeVersion", id: 0x4287, type: "uinteger" },
{ path: "\\EBML\\DocTypeReadVersion", id: 0x4285, type: "uinteger" },
{ path: "\\EBML\\DocTypeExtension", id: 0x4281, type: "master" },
{ path: "\\EBML\\DocTypeExtensionName", id: 0x4283, type: "string" },
{ path: "\\EBML\\DocTypeExtensionVersion", id: 0x4284, type: "uinteger" },
{ path: "\\(1-\\)CRC-32", id: 0xBF, type: "binary" },
{ path: "\\(-\\)Void", id: 0xEC, type: "binary" }
];
async function fetchElementDefs(schemaUrl: string) {
const resp = await fetch(schemaUrl);
if (!resp.ok) throw new Error(`Fetch failed: ${resp.status}`);
const bodyText = await resp.text();
const doc = new DOMParser().parseFromString(bodyText, "text/html")!;
return [...doc.querySelectorAll("element")]
.map(x => (x as Element).attributes)
.map(x => ({ path: x.path, id: Number(x.id), type: x.type }));
}
elementDefs = elementDefs.concat(await fetchElementDefs("https://github.com/ietf-wg-cellar/matroska-specification/raw/master/ebml_matroska.xml"));
// memo: 4byteあればTimecodeScale 1ms で1ヶ月、offset/sizeで2GBはいける
// MAX_SAFE_INTEGERは6バイト強
// MatroskaのEBMLSchema上はIDが4バイト、サイズが8バイトまで
// ...と思ったらMAX8バイトを安定して使ってくるのか……
class DataViewEx extends DataView {
textDecoder = new TextDecoder("utf-8")
constructor(buffer: ArrayBuffer) {
super(buffer);
}
// int、実際はほとんど使わなさそう…
getInt(offset: number, size: number) {
switch (size) {
case 1: return this.getInt8(offset);
case 2: return this.getInt16(offset);
case 3: return (this.getInt16(offset) << 8) + this.getUint8(offset + 2);
case 4: return this.getInt32(offset);
default: throw new Error(`Unsupported size ${size} at offset 0x${offset.toString(16)}`);
}
}
getUint(offset: number, size: number) {
switch (size) {
case 1: return this.getUint8(offset);
case 2: return this.getUint16(offset);
case 3: return (this.getUint16(offset) << 8) + this.getUint8(offset + 2);
case 4: return this.getUint32(offset);
case 5: return (this.getUint32(offset) * 0x100) + this.getUint8(offset + 4);
case 6: return (this.getUint32(offset) * 0x10000) + this.getUint16(offset + 4);
case 7: return (this.getUint32(offset) * 0x1000000) + (this.getUint32(offset + 3) & 0xFFFFFF);
default: throw new Error(`Unsupported size ${size} at offset 0x${offset.toString(16)}`);
}
}
setUint(offset: number, size: number, value: number) {
switch (size) {
case 1: return this.setUint8(offset, value);
case 2: return this.setUint16(offset, value);
case 3: return this.setUint16(offset, value >>> 8), this.setUint8(offset + 2, value);
case 4: return this.setUint32(offset, value);
case 5: return this.setUint32(offset, value / 0x100), this.setUint8(offset + 4, value);
case 6: return this.setUint32(offset, value / 0x10000), this.setUint16(offset + 4, value);
case 7: return this.setUint32(offset, value / 0x1000000), this.setUint32(offset + 3, value);
case 8:
if (value > Number.MAX_SAFE_INTEGER) throw new Error(`${value} > Number.MAX_SAFE_INTEGER at offset 0x${offset.toString(16)}`);
else return this.setBigUint64(offset, BigInt(value));
default: throw new Error(`Unsupported size ${size} at offset 0x${offset.toString(16)}`);
}
}
getFloat(offset: number, size: number) {
switch (size) {
case 4: return this.getFloat32(offset);
case 8: return this.getFloat64(offset);
default: throw new Error(`Unsupported size ${size} at offset 0x${offset.toString(16)}`);
}
}
getVINT(offset: number, raw: boolean = false) {
const leading = this.getUint8(offset);
const mask = (value: number, mask: number) => {
if (raw) return value;
const masked = value & mask;
return (masked == mask) ? Number.POSITIVE_INFINITY : masked;
};
if (leading & 0x80) return [1, mask(leading, 0x7F)];
if (leading & 0x40) return [2, mask(this.getUint(offset, 2), 0x3FFF)];
if (leading & 0x20) return [3, mask(this.getUint(offset, 3), 0x1FFFFF)];
if (leading & 0x10) return [4, mask(this.getUint(offset, 4), 0x0FFFFFFF)];
if (leading & 0x08) return [5, mask(this.getUint(offset, 5), 0x07FFFFFFFF)];
if (leading & 0x04) return [6, mask(this.getUint(offset, 6), 0x03FFFFFFFFFF)];
if (leading & 0x02) return [7, mask(this.getUint(offset, 7), 0x01FFFFFFFFFFFF)];
// 8 octet の仮対応
const bigint = this.getBigUint64(offset);
if (bigint === 0x01FFFFFFFFFFFFFFn) return [8, Number.POSITIVE_INFINITY];
const result_bigint = raw ? bigint : bigint & 0x00FFFFFFFFFFFFFFn;
if (result_bigint > Number.MAX_SAFE_INTEGER)
throw new Error(`Unsupported VINT (${bigint.toString(16)}>MAX_SAFE_INTEGER) at offset 0x${offset.toString(16)}`);
return [8, Number(result_bigint)];
}
setVINT(offset: number, value: number, width: number) {
this.setUint(offset, width, value);
const vint_marker = 1 << (8 - width);
this.setInt8(offset, this.getInt8(offset) & vint_marker - 1 | vint_marker);
}
getString(offset: number, size: number) {
return this.textDecoder.decode(this.buffer.slice(offset, offset + size));
}
}
function* createVisitor(arr: ArrayBuffer) {
const view = new DataViewEx(arr);
const readValue = (type: string, pos: number, size: number) => {
switch (type) {
case "uinteger": return view.getUint(pos, size);
case "integer": return view.getInt(pos, size);
case "string": return view.getString(pos, size).replace(/\x00.+/, "");
case "utf-8": return view.getString(pos, size).replace(/\x00.+/, "");
case "float": return view.getFloat(pos, size);
default: return undefined;
}
};
const readSimpleBlockStructure = (pos: number) => {
const [track_len, track] = view.getVINT(pos);
const relativeTs = view.getInt16(pos + track_len);
const flags = view.getUint8(pos + track_len + 2);
return { trackNo: track, relativeTs, keyFrame: !!(flags & 0x80) };
}
let position = 0;
while (position < view.byteLength) {
const [id_len, id] = view.getVINT(position, true);
const [size_len, size] = view.getVINT(position + id_len);
const def = elementDefs.find(x => x.id === id);
const value = !def ? undefined
: def.id === 0xA3 ? readSimpleBlockStructure(position + id_len + size_len)
: readValue(def.type, position + id_len + size_len, size);
let stepOver = false;
yield {
position,
id,
id_len,
size,
size_len,
value,
def,
makeVoid(): void {
view.setUint8(this.position, 0xEC /*Void*/);
const len_with_size = this.id_len + this.size_len + this.size - 1;
if (len_with_size <= 127) view.setVINT(this.position + 1, len_with_size - 1, 1);
else view.setVINT(this.position + 1, len_with_size - 7, 7);
},
makeUnknownSize(): void {
view.setVINT(this.position + this.id_len, -1, this.size_len);
},
stepOver(): void {
stepOver = true;
}
};
position = position + id_len + size_len;
if (stepOver || def?.type !== "master") position += size;
}
}
const arr = await Deno.readFile(Deno.args[0]);
const visitor = createVisitor(arr.buffer);
for (const entry of visitor) {
if (entry.def)
console.log(`${entry.def?.path} ${entry.def?.type}(${entry.size} bytes) ${entry.value != null ? "= " + JSON.stringify(entry.value) : ""}`);
else
console.log(`unknown element 0x${entry.id.toString(16)}`);
switch(((entry.def?.path ?? "").match(/\w+$/) ?? [])[0]) {
case "SeekHead":
case "Duration":
case "Cues":
// entry.makeVoid();
// entry.stepOver();
break;
case "Segment":
// entry.makeUnknownSize();
break;
case "Cluster":
case "Cues":
// entry.stepOver();
break;
}
}
if (Deno.args[1]){
await Deno.writeFile(Deno.args[1], arr);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment