Last active
May 29, 2021 18:58
-
-
Save brandonmp/fd56f1c27aaa5234f431bdd65fdfa96b to your computer and use it in GitHub Desktop.
when you have a json too big for memory, here's away to stream out specific keys you want
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* this is a sort of filter function that returns every value that matches a provided path | |
if you know the path of the node you want, use the function in the other file--it'll be faster */ | |
var JSONStream = require('JSONStream') | |
var fs = require('fs') | |
// use immutable for easier assigning of deeply nested props | |
var Immutable = require('immutable') | |
var writeJSON = require('simple-json-writing-util.js') | |
var M = Immutable.Map({}) | |
// this will pass to its callback any value that has the path '/true/images', where `true` is a wildcard | |
// check JSONStream docs but i think regex also works | |
var stream = JSONStream.parse([true, 'images', { emitKey: true }], (value, fullPath) => { | |
// fullPath = [resultsOfWildcard, 'images'] | |
// which matches the immutable `setIn` syntax | |
M = M.setIn(fullPath, value) | |
}) | |
var readStream = fs.createReadStream('./filtered.json') | |
readStream.pipe(stream) | |
readStream.on('end', () => { | |
writeJSON('home-images.json', M.toJS()) | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* this will get a node at a specific path. it's faster than above function, but doesn't support wildcards in path */ | |
var fs = require('fs') | |
var DepthStream = require('json-depth-stream') | |
var zlib = require('zlib') | |
function getBigJSONKey(readPath, keyPath, writePath, gzipOutput = false) { | |
var gunzip = zlib.createGunzip() | |
var gzip = zlib.createGzip() | |
var readStream = fs.createReadStream(readPath) | |
var writeStream = fs.createWriteStream(writePath) | |
const json = new DepthStream(depth) | |
// decide depth of parse based on length of keypath provided | |
const depth = keyPath.length | |
// construct query portion of the pipe. will return chunk key of which matches `keyPath` | |
const q = json.query(keyPath) | |
q.once('end', () => { | |
readStream.unpipe(json) | |
}) | |
// decide write compression | |
if (gzipOutput === true) { | |
q.pipe(gzip) | |
.pipe(writeStream) | |
} else { | |
q.pipe(writeStream) | |
} | |
// start read | |
if (readPath.slice(-2) === "gz") { | |
readStream | |
.pipe(gunzip) | |
.pipe(json) | |
} else { | |
readStream | |
.pipe(json) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment