gglnx/extract-chapters-from-m4a-as-psc.js

## extract-chapters-from-m4a-as-psc.js
#!/usr/bin/env node
'use strict';
const cheerio = require('cheerio');
const child_process = require('child_process');
const fs = require('fs');
const meow = require('meow');
const MP4Box = require('mp4box');
const path = require('path');
const tmp = require('tmp');
const groupBy = require('lodash.groupby');
const xmlbuilder = require('xmlbuilder');
const replaceExt = require('replace-ext');

// Init cli helper
const cli = meow(`
  Usage
    $ extract-chapters-from-m4a-as-psc <input> [<output>]

  Examples
    $ extract-chapters-from-m4a-as-psc input.m4a output.psc
    🔖 23 chapters extracted to output.psc
`);

// Render help if input is missing
if (!cli.input[0]) {
  console.log(cli.help);
  process.exit(0);
}

// Input
const input = path.resolve(cli.input[0]);

// Output
const output = cli.input[1] ? path.resolve(cli.input[1]) : replaceExt(input, '.psc');

// Load m4a/mp4 file
const mp4box = new MP4Box.MP4Box();
const file = fs.readFileSync(input);
const arrayBuffer = new Uint8Array(file).buffer;
arrayBuffer.fileStart = 0;
mp4box.appendBuffer(arrayBuffer);
const info = mp4box.getInfo();

// Extract tx3g tracks
const chapterMarks = info.tracks
  // Filter by type
  .filter(t => t.codec === 'tx3g')

  // Extract XML
  .map(t => {
    // Get temp file
    const tmpFile = tmp.fileSync();

    // Run mp4box
    // TODO: Replace with JS
    child_process.execSync(`mp4box -ttxt ${t.id} ${input} -out ${tmpFile.name}`, {
      stdio: 'ignore',
    });

    // Extract XML from tmp file
    const xmlFile = fs.readFileSync(tmpFile.name);
    const $xml = cheerio.load(xmlFile.toString('utf8'), {
      decodeEntities: true,
      normalizeWhitespace: true,
      xmlMode: true,
    });

    // Chapters
    const chapters = $xml('TextSample').map((index, ts) => {
      // Get chapter object
      const $ts = $xml(ts);
      const chapter = {
        start: $ts.attr('sampleTime'),
        title: $ts.text().trim(),
      };

      // Check if href exists
      if ($ts.find('HyperLink').length > 0) {
        chapter.href = $ts.find('HyperLink').attr('URL').trim();
      }

      // Return chapter JSON
      return chapter;
    }).get().filter(c => c.title !== '');

    // Remove tmp file
    tmpFile.removeCallback();

    // Return chapters
    return chapters;
  })

  // Reduce to one chapter mark object
  .reduce((prevCm, cm) => {
    const chapters = [].concat(prevCm, cm);
    const groupedByTime = groupBy(chapters, o => o.start);
    const output = [];

    Object.keys(groupedByTime).forEach(t => {
      output.push(Object.assign.apply(Object.assign, groupedByTime[t]));
    });

    return output;
  }, []);

// Generate Podlove Simple Chapters
const psc = xmlbuilder.create('psc:chapters', { encoding: 'utf-8' })
  .att('version', '1.1')
  .att('xmlns:psc', 'http://podlove.org/simple-chapters');

// Add entries
chapterMarks.forEach(cm => psc.ele('psc:chapter', cm).up());

// Save PSC file
fs.writeFileSync(output, psc.end({ pretty: true }));

// Ready.
console.log(`🔖 ${chapterMarks.length} chapters extracted to ${output}`);

## package.json
{
  "name": "extract-chapters-from-m4a-as-psc",
  "version": "0.1.0",
  "description": "Extract chapter marks from M4A/MP4 files as Podlove Simple Chapters",
  "main": "extract-chapters-from-m4a-as-psc.js",
  "author": "Dennis Morhardt <info@dennismorhardt.de>",
  "license": "MIT",
  "bin": {
    "extract-chapters-from-m4a-as-psc": "extract-chapters-from-m4a-as-psc.js"
  },
  "dependencies": {
    "cheerio": "^0.22.0",
    "lodash.groupby": "^4.6.0",
    "meow": "^3.7.0",
    "mp4box": "^0.3.15",
    "replace-ext": "^1.0.0",
    "tmp": "^0.0.31",
    "xmlbuilder": "^9.0.0"
  }
}
	#!/usr/bin/env node
	'use strict';
	const cheerio = require('cheerio');
	const child_process = require('child_process');
	const fs = require('fs');
	const meow = require('meow');
	const MP4Box = require('mp4box');
	const path = require('path');
	const tmp = require('tmp');
	const groupBy = require('lodash.groupby');
	const xmlbuilder = require('xmlbuilder');
	const replaceExt = require('replace-ext');

	// Init cli helper
	const cli = meow(`
	Usage
	$ extract-chapters-from-m4a-as-psc <input> [<output>]

	Examples
	$ extract-chapters-from-m4a-as-psc input.m4a output.psc
	🔖 23 chapters extracted to output.psc
	`);

	// Render help if input is missing
	if (!cli.input[0]) {
	console.log(cli.help);
	process.exit(0);
	}

	// Input
	const input = path.resolve(cli.input[0]);

	// Output
	const output = cli.input[1] ? path.resolve(cli.input[1]) : replaceExt(input, '.psc');

	// Load m4a/mp4 file
	const mp4box = new MP4Box.MP4Box();
	const file = fs.readFileSync(input);
	const arrayBuffer = new Uint8Array(file).buffer;
	arrayBuffer.fileStart = 0;
	mp4box.appendBuffer(arrayBuffer);
	const info = mp4box.getInfo();

	// Extract tx3g tracks
	const chapterMarks = info.tracks
	// Filter by type
	.filter(t => t.codec === 'tx3g')

	// Extract XML
	.map(t => {
	// Get temp file
	const tmpFile = tmp.fileSync();

	// Run mp4box
	// TODO: Replace with JS
	child_process.execSync(`mp4box -ttxt ${t.id} ${input} -out ${tmpFile.name}`, {
	stdio: 'ignore',
	});

	// Extract XML from tmp file
	const xmlFile = fs.readFileSync(tmpFile.name);
	const $xml = cheerio.load(xmlFile.toString('utf8'), {
	decodeEntities: true,
	normalizeWhitespace: true,
	xmlMode: true,
	});

	// Chapters
	const chapters = $xml('TextSample').map((index, ts) => {
	// Get chapter object
	const $ts = $xml(ts);
	const chapter = {
	start: $ts.attr('sampleTime'),
	title: $ts.text().trim(),
	};

	// Check if href exists
	if ($ts.find('HyperLink').length > 0) {
	chapter.href = $ts.find('HyperLink').attr('URL').trim();
	}

	// Return chapter JSON
	return chapter;
	}).get().filter(c => c.title !== '');

	// Remove tmp file
	tmpFile.removeCallback();

	// Return chapters
	return chapters;
	})

	// Reduce to one chapter mark object
	.reduce((prevCm, cm) => {
	const chapters = [].concat(prevCm, cm);
	const groupedByTime = groupBy(chapters, o => o.start);
	const output = [];

	Object.keys(groupedByTime).forEach(t => {
	output.push(Object.assign.apply(Object.assign, groupedByTime[t]));
	});

	return output;
	}, []);

	// Generate Podlove Simple Chapters
	const psc = xmlbuilder.create('psc:chapters', { encoding: 'utf-8' })
	.att('version', '1.1')
	.att('xmlns:psc', 'http://podlove.org/simple-chapters');

	// Add entries
	chapterMarks.forEach(cm => psc.ele('psc:chapter', cm).up());

	// Save PSC file
	fs.writeFileSync(output, psc.end({ pretty: true }));

	// Ready.
	console.log(`🔖 ${chapterMarks.length} chapters extracted to ${output}`);
	{
	"name": "extract-chapters-from-m4a-as-psc",
	"version": "0.1.0",
	"description": "Extract chapter marks from M4A/MP4 files as Podlove Simple Chapters",
	"main": "extract-chapters-from-m4a-as-psc.js",
	"author": "Dennis Morhardt <info@dennismorhardt.de>",
	"license": "MIT",
	"bin": {
	"extract-chapters-from-m4a-as-psc": "extract-chapters-from-m4a-as-psc.js"
	},
	"dependencies": {
	"cheerio": "^0.22.0",
	"lodash.groupby": "^4.6.0",
	"meow": "^3.7.0",
	"mp4box": "^0.3.15",
	"replace-ext": "^1.0.0",
	"tmp": "^0.0.31",
	"xmlbuilder": "^9.0.0"
	}
	}