Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Extract chapter marks from M4A/MP4 files as Podlove Simple Chapters
#!/usr/bin/env node
'use strict';
const cheerio = require('cheerio');
const child_process = require('child_process');
const fs = require('fs');
const meow = require('meow');
const MP4Box = require('mp4box');
const path = require('path');
const tmp = require('tmp');
const groupBy = require('lodash.groupby');
const xmlbuilder = require('xmlbuilder');
const replaceExt = require('replace-ext');
// Init cli helper
const cli = meow(`
Usage
$ extract-chapters-from-m4a-as-psc <input> [<output>]
Examples
$ extract-chapters-from-m4a-as-psc input.m4a output.psc
🔖 23 chapters extracted to output.psc
`);
// Render help if input is missing
if (!cli.input[0]) {
console.log(cli.help);
process.exit(0);
}
// Input
const input = path.resolve(cli.input[0]);
// Output
const output = cli.input[1] ? path.resolve(cli.input[1]) : replaceExt(input, '.psc');
// Load m4a/mp4 file
const mp4box = new MP4Box.MP4Box();
const file = fs.readFileSync(input);
const arrayBuffer = new Uint8Array(file).buffer;
arrayBuffer.fileStart = 0;
mp4box.appendBuffer(arrayBuffer);
const info = mp4box.getInfo();
// Extract tx3g tracks
const chapterMarks = info.tracks
// Filter by type
.filter(t => t.codec === 'tx3g')
// Extract XML
.map(t => {
// Get temp file
const tmpFile = tmp.fileSync();
// Run mp4box
// TODO: Replace with JS
child_process.execSync(`mp4box -ttxt ${t.id} ${input} -out ${tmpFile.name}`, {
stdio: 'ignore',
});
// Extract XML from tmp file
const xmlFile = fs.readFileSync(tmpFile.name);
const $xml = cheerio.load(xmlFile.toString('utf8'), {
decodeEntities: true,
normalizeWhitespace: true,
xmlMode: true,
});
// Chapters
const chapters = $xml('TextSample').map((index, ts) => {
// Get chapter object
const $ts = $xml(ts);
const chapter = {
start: $ts.attr('sampleTime'),
title: $ts.text().trim(),
};
// Check if href exists
if ($ts.find('HyperLink').length > 0) {
chapter.href = $ts.find('HyperLink').attr('URL').trim();
}
// Return chapter JSON
return chapter;
}).get().filter(c => c.title !== '');
// Remove tmp file
tmpFile.removeCallback();
// Return chapters
return chapters;
})
// Reduce to one chapter mark object
.reduce((prevCm, cm) => {
const chapters = [].concat(prevCm, cm);
const groupedByTime = groupBy(chapters, o => o.start);
const output = [];
Object.keys(groupedByTime).forEach(t => {
output.push(Object.assign.apply(Object.assign, groupedByTime[t]));
});
return output;
}, []);
// Generate Podlove Simple Chapters
const psc = xmlbuilder.create('psc:chapters', { encoding: 'utf-8' })
.att('version', '1.1')
.att('xmlns:psc', 'http://podlove.org/simple-chapters');
// Add entries
chapterMarks.forEach(cm => psc.ele('psc:chapter', cm).up());
// Save PSC file
fs.writeFileSync(output, psc.end({ pretty: true }));
// Ready.
console.log(`🔖 ${chapterMarks.length} chapters extracted to ${output}`);
{
"name": "extract-chapters-from-m4a-as-psc",
"version": "0.1.0",
"description": "Extract chapter marks from M4A/MP4 files as Podlove Simple Chapters",
"main": "extract-chapters-from-m4a-as-psc.js",
"author": "Dennis Morhardt <info@dennismorhardt.de>",
"license": "MIT",
"bin": {
"extract-chapters-from-m4a-as-psc": "extract-chapters-from-m4a-as-psc.js"
},
"dependencies": {
"cheerio": "^0.22.0",
"lodash.groupby": "^4.6.0",
"meow": "^3.7.0",
"mp4box": "^0.3.15",
"replace-ext": "^1.0.0",
"tmp": "^0.0.31",
"xmlbuilder": "^9.0.0"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment