Created
March 18, 2019 16:32
-
-
Save gglnx/86bc6de7cef16e16d8b3a1f8a8235427 to your computer and use it in GitHub Desktop.
Extract chapter marks from M4A/MP4 files as Podlove Simple Chapters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
'use strict'; | |
const cheerio = require('cheerio'); | |
const child_process = require('child_process'); | |
const fs = require('fs'); | |
const meow = require('meow'); | |
const MP4Box = require('mp4box'); | |
const path = require('path'); | |
const tmp = require('tmp'); | |
const groupBy = require('lodash.groupby'); | |
const xmlbuilder = require('xmlbuilder'); | |
const replaceExt = require('replace-ext'); | |
// Init cli helper | |
const cli = meow(` | |
Usage | |
$ extract-chapters-from-m4a-as-psc <input> [<output>] | |
Examples | |
$ extract-chapters-from-m4a-as-psc input.m4a output.psc | |
🔖 23 chapters extracted to output.psc | |
`); | |
// Render help if input is missing | |
if (!cli.input[0]) { | |
console.log(cli.help); | |
process.exit(0); | |
} | |
// Input | |
const input = path.resolve(cli.input[0]); | |
// Output | |
const output = cli.input[1] ? path.resolve(cli.input[1]) : replaceExt(input, '.psc'); | |
// Load m4a/mp4 file | |
const mp4box = new MP4Box.MP4Box(); | |
const file = fs.readFileSync(input); | |
const arrayBuffer = new Uint8Array(file).buffer; | |
arrayBuffer.fileStart = 0; | |
mp4box.appendBuffer(arrayBuffer); | |
const info = mp4box.getInfo(); | |
// Extract tx3g tracks | |
const chapterMarks = info.tracks | |
// Filter by type | |
.filter(t => t.codec === 'tx3g') | |
// Extract XML | |
.map(t => { | |
// Get temp file | |
const tmpFile = tmp.fileSync(); | |
// Run mp4box | |
// TODO: Replace with JS | |
child_process.execSync(`mp4box -ttxt ${t.id} ${input} -out ${tmpFile.name}`, { | |
stdio: 'ignore', | |
}); | |
// Extract XML from tmp file | |
const xmlFile = fs.readFileSync(tmpFile.name); | |
const $xml = cheerio.load(xmlFile.toString('utf8'), { | |
decodeEntities: true, | |
normalizeWhitespace: true, | |
xmlMode: true, | |
}); | |
// Chapters | |
const chapters = $xml('TextSample').map((index, ts) => { | |
// Get chapter object | |
const $ts = $xml(ts); | |
const chapter = { | |
start: $ts.attr('sampleTime'), | |
title: $ts.text().trim(), | |
}; | |
// Check if href exists | |
if ($ts.find('HyperLink').length > 0) { | |
chapter.href = $ts.find('HyperLink').attr('URL').trim(); | |
} | |
// Return chapter JSON | |
return chapter; | |
}).get().filter(c => c.title !== ''); | |
// Remove tmp file | |
tmpFile.removeCallback(); | |
// Return chapters | |
return chapters; | |
}) | |
// Reduce to one chapter mark object | |
.reduce((prevCm, cm) => { | |
const chapters = [].concat(prevCm, cm); | |
const groupedByTime = groupBy(chapters, o => o.start); | |
const output = []; | |
Object.keys(groupedByTime).forEach(t => { | |
output.push(Object.assign.apply(Object.assign, groupedByTime[t])); | |
}); | |
return output; | |
}, []); | |
// Generate Podlove Simple Chapters | |
const psc = xmlbuilder.create('psc:chapters', { encoding: 'utf-8' }) | |
.att('version', '1.1') | |
.att('xmlns:psc', 'http://podlove.org/simple-chapters'); | |
// Add entries | |
chapterMarks.forEach(cm => psc.ele('psc:chapter', cm).up()); | |
// Save PSC file | |
fs.writeFileSync(output, psc.end({ pretty: true })); | |
// Ready. | |
console.log(`🔖 ${chapterMarks.length} chapters extracted to ${output}`); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "extract-chapters-from-m4a-as-psc", | |
"version": "0.1.0", | |
"description": "Extract chapter marks from M4A/MP4 files as Podlove Simple Chapters", | |
"main": "extract-chapters-from-m4a-as-psc.js", | |
"author": "Dennis Morhardt <info@dennismorhardt.de>", | |
"license": "MIT", | |
"bin": { | |
"extract-chapters-from-m4a-as-psc": "extract-chapters-from-m4a-as-psc.js" | |
}, | |
"dependencies": { | |
"cheerio": "^0.22.0", | |
"lodash.groupby": "^4.6.0", | |
"meow": "^3.7.0", | |
"mp4box": "^0.3.15", | |
"replace-ext": "^1.0.0", | |
"tmp": "^0.0.31", | |
"xmlbuilder": "^9.0.0" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment