Skip to content

Instantly share code, notes, and snippets.

@polotek
Created July 21, 2010 04:48
Show Gist options
  • Save polotek/484083 to your computer and use it in GitHub Desktop.
Save polotek/484083 to your computer and use it in GitHub Desktop.
/** Testing sax push parsing
*
* The output file should have almost identical
* to the input file
*
* Known issues with this example output
*
* - Doesn't escape entities. You can do it
* manually on the character content.
* - Doesn't replicate self-closing tags.
* They get an end element instead
**/
var sys = require('sys')
, fs = require('fs')
, libxmljs = require('libxmljs');
var d = new Date();
// input xml file
var infile = fs.createReadStream('input.xml', {encoding:'utf8'});
// output xml file
var outfile = fs.createWriteStream('output.xml', {encoding:'utf8'});
var funcs = {
onStartDocument: function() {
console.log('starting...');
outfile.write('<?xml version="1.0" encoding="utf-8"?>\n');
}
, onEndDocument: function() {
outfile.end();
console.log('done.');
}
, onStartElementNS: function(elem, attrs, prefix, uri, namespaces) {
var sys = require('sys')
, out = []
, aname, aprefix, aurl, aval;
out.push('<' + (prefix ? prefix + ':' : '') + elem);
if(attrs.length) {
for(var i=0, len=attrs.length; i<len; i++) {
out.push(' ');
aname = attrs[i][0]
, aprefix = attrs[i][1]
, aurl = attrs[i][2]
, aval = attrs[i][3];
out.push((aprefix ? aprefix + ':' : '')
+ aname + '=' + '"' + (aval || '') + '"');
}
}
if(namespaces.length) {
for(i=0, len=namespaces.length; i<len; i++) {
out.push(' ');
aname = namespaces[i][0];
aval = namespaces[i][1];
out.push('xmlns:'
+ aname + '=' + '"' + (aval || '') + '"');
}
}
out.push('>');
outfile.write(out.join(''));
}
, onEndElementNS: function(elem, prefix, uri) {
outfile.write('</' + (prefix ? prefix + ':' : '') + elem + '>');
}
, onCharacters: function(chars) {
outfile.write(chars || '');
}
, onCdata: function(chars) {
outfile.write('<![CDATA[');
outfile.write(chars || '');
outfile.write(']]>');
}
, onComment: function(chars) {
outfile.write('<!--');
outfile.write(chars || '');
outfile.write('-->');
}
, onWarning: function(warning) {
console.warn(warning);
}
, onError: function(error) {
console.error('ERROR: ' + error);
}
}
var p = new libxmljs.SaxPushParser(function (cb) {
var keys = Object.keys(funcs)
, key, val;
for(var i=0;i<keys.length; i++) {
key = keys[i]
, val = funcs[key];
cb[key](val);
}
});
infile.on('data', function(chunk) {
if(chunk) p.push(chunk);
});
infile.on('end', function() {
console.log('TIME: ' + (new Date() - d) + ' ms');
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment