Skip to content

Instantly share code, notes, and snippets.

@brandonmp
Last active May 29, 2021 18:58
Show Gist options
  • Save brandonmp/fd56f1c27aaa5234f431bdd65fdfa96b to your computer and use it in GitHub Desktop.
Save brandonmp/fd56f1c27aaa5234f431bdd65fdfa96b to your computer and use it in GitHub Desktop.
when you have a json too big for memory, here's away to stream out specific keys you want
/* this is a sort of filter function that returns every value that matches a provided path
if you know the path of the node you want, use the function in the other file--it'll be faster */
var JSONStream = require('JSONStream')
var fs = require('fs')
// use immutable for easier assigning of deeply nested props
var Immutable = require('immutable')
var writeJSON = require('simple-json-writing-util.js')
var M = Immutable.Map({})
// this will pass to its callback any value that has the path '/true/images', where `true` is a wildcard
// check JSONStream docs but i think regex also works
var stream = JSONStream.parse([true, 'images', { emitKey: true }], (value, fullPath) => {
// fullPath = [resultsOfWildcard, 'images']
// which matches the immutable `setIn` syntax
M = M.setIn(fullPath, value)
})
var readStream = fs.createReadStream('./filtered.json')
readStream.pipe(stream)
readStream.on('end', () => {
writeJSON('home-images.json', M.toJS())
})
/* this will get a node at a specific path. it's faster than above function, but doesn't support wildcards in path */
var fs = require('fs')
var DepthStream = require('json-depth-stream')
var zlib = require('zlib')
function getBigJSONKey(readPath, keyPath, writePath, gzipOutput = false) {
var gunzip = zlib.createGunzip()
var gzip = zlib.createGzip()
var readStream = fs.createReadStream(readPath)
var writeStream = fs.createWriteStream(writePath)
const json = new DepthStream(depth)
// decide depth of parse based on length of keypath provided
const depth = keyPath.length
// construct query portion of the pipe. will return chunk key of which matches `keyPath`
const q = json.query(keyPath)
q.once('end', () => {
readStream.unpipe(json)
})
// decide write compression
if (gzipOutput === true) {
q.pipe(gzip)
.pipe(writeStream)
} else {
q.pipe(writeStream)
}
// start read
if (readPath.slice(-2) === "gz") {
readStream
.pipe(gunzip)
.pipe(json)
} else {
readStream
.pipe(json)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment