Skip to content

Instantly share code, notes, and snippets.

@cbogart
Created October 16, 2015 14:39
Show Gist options
  • Save cbogart/6924523d4620dbd43068 to your computer and use it in GitHub Desktop.
Save cbogart/6924523d4620dbd43068 to your computer and use it in GitHub Desktop.
scans a list of gzipped json files looking for entries that match some filter
var zlib = require("zlib");
var jsonstream = require("json-stream");
var fs = require("fs");
var combine = require("combine-streams");
var Readable = require("stream").Readable;
var scangz = function (fname) {
var rawread = fs.createReadStream(fname);
var unzipped = rawread.pipe(zlib.createUnzip());
return unzipped;
}
// Return a stream
module.exports.scanner = function(cachedir, dateprefix, filter) {
var results = new Readable({objectMode: true});
results._read = function noop() {};
fs.readdir(cachedir, function(err,files) {
if (err) throw err;
files.forEach(function(file){
if (file.endsWith(".json.gz") && file.indexOf(dateprefix)>-1) {
console.log("Scanning:",file);
var scanner = scangz(cachedir + file);
scanner.on('end', function () { console.log("Done with", file); });
var scannerp = scanner.pipe(jsonstream());
scannerp.on('error', function(err) {
console.log(err);
console.log(err.stack);
});
scannerp.on('data', function(chunk) {
if (filter(chunk)) {
results.push(chunk);
}
});
}
});
});
return results;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment