A quick reformatting tool for Eidos Methode XML files. Use like so:
node index.js -i input-file.xml -o output-file.md
var minimist = require("minimist"); | |
var sax = require("sax"); | |
var fs = require("fs"); | |
var args = minimist(process.argv); | |
var input = fs.createReadStream(args.i); | |
var parser = sax.createStream(); | |
var Node = function() { | |
this.type = ""; | |
this.tag = ""; | |
this.contents = ""; | |
this.children = []; | |
this.attributes = {}; | |
this.parent = null; | |
} | |
Node.prototype = { | |
addChild(child) { | |
child.parent = this; | |
this.children.push(child); | |
}, | |
removeChild(child) { | |
child.parent = null; | |
this.children = this.children.filter(n => n != child); | |
} | |
}; | |
var tree = new Node(); | |
tree.type = "document"; | |
var current = tree; | |
parser.on("error", e => console.log("ERROR", e)); | |
parser.on("opentag", function(node) { | |
var element = new Node(); | |
element.type = "element"; | |
element.tag = node.name.toLowerCase(); | |
for (var a in node.attributes) { | |
element.attributes[a.toLowerCase()] = node.attributes[a]; | |
} | |
current.addChild(element); | |
current = element; | |
}); | |
parser.on("closetag", function(node) { | |
current = current.parent; | |
}); | |
parser.on("text", function(text) { | |
var t = new Node(); | |
t.type = "text"; | |
t.contents = text.replace(/[\n\r]+/g, " ").replace(/ {2,}/g, " "); | |
current.addChild(t); | |
}); | |
var noop = function() {}; | |
var walk = function(root, enter, exit = noop) { | |
var visit = function(node) { | |
var result = enter(node); | |
if (result === false) return; | |
if (node.children && node.children.length) node.children.forEach(visit); | |
exit(node); | |
} | |
visit(root); | |
}; | |
var $ = function(tagname) { | |
var found = []; | |
walk(tree, function(node) { | |
if (node.tag == tagname) found.push(node); | |
}); | |
return found; | |
}; | |
parser.on("end", function() { | |
var out = ""; | |
var flatten = ["headline", "subhead", "a"]; | |
var text = function(node) { | |
var t = ""; | |
node.children.forEach(c => walk(c, function(n) { | |
t += n.contents; | |
})); | |
return t; | |
} | |
var cull = function(node) { | |
if ( | |
node.attributes.channel == "!" || | |
(node.type == "element" && !node.children.length) || | |
node.tag == "annotation") { | |
node.parent.removeChild(node); | |
return false; | |
} | |
if (flatten.includes(node.tag)) { | |
node.contents = text(node); | |
node.children = []; | |
} | |
}; | |
var formatters = { | |
p: node => "\n\n" + node.contents, | |
headline: node => "\n\n# " + node.contents, | |
subhead: node => "\n\n## " + node.contents, | |
a: node => `[${node.contents}](${node.attributes.href})`, | |
any: node => node.contents | |
}; | |
var enter = node => out += (formatters[node.tag] || formatters.any)(node); | |
walk(tree, cull); | |
walk(tree, enter); | |
out = out.replace(/^ +| +$/gm, "").replace(/ {2,}/g, " ") | |
fs.writeFileSync(args.o, out); | |
}); | |
input.pipe(parser); |