Skip to content

Instantly share code, notes, and snippets.

@divergentdave
Last active November 12, 2015 11:28
Show Gist options
  • Save divergentdave/df9d3dcebe1f571a1d11 to your computer and use it in GitHub Desktop.
Save divergentdave/df9d3dcebe1f571a1d11 to your computer and use it in GitHub Desktop.
eCFR test harness for citation
#!/usr/bin/env node
var Citation = require('../citation');
var fs = require('fs');
var sax = require('sax');
var unzip = require('unzip2');
function parserFactory() {
var stream = sax.createStream(true, {trim: true});
var in_contents = 0;
var title;
stream.on('opentag', function(node) {
if (node.name == 'CONTENTS') {
in_contents++;
}
});
stream.on('closetag', function(name) {
if (name == 'CONTENTS') {
in_contents--;
}
});
stream.on('text', function(text) {
if (in_contents != 0) {
return;
}
var tag_name = stream._parser.tag.name;
if (tag_name == 'SECTNO') {
var input = title + ' CFR ' + text;
var output = Citation.find(input).citations;
if (output.length == 0) {
console.log("Did not parse " + input);
} else if (output.length == 1) {
if (input == output[0].match) {
} else {
console.log("Incorrect match found for " + input);
}
} else {
console.log("More than one match found for " + input);
console.log(output);
}
} else if (tag_name == 'TITLENUM') {
title = text.replace('Title ', '');
}
});
return stream
}
if (process.argv.length <= 2) {
console.error("Usage: Download a bulk data zip file from http://www.gpo.gov/fdsys/bulkdata/CFR, then pass its name as a command line argument");
}
process.argv.slice(2).forEach(function(path) {
fs.createReadStream(path)
.pipe(unzip.Parse())
.on('entry', function(entry) {
var name = entry.path;
var type = entry.type;
if (type == 'File') {
console.log(name);
var parser = parserFactory();
entry.pipe(parser);
} else {
entry.autodrain();
}
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment