Skip to content

Instantly share code, notes, and snippets.

@belaz
Last active June 21, 2019 22:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save belaz/eb84e075e79c5b41fa537804e8b502db to your computer and use it in GitHub Desktop.
Save belaz/eb84e075e79c5b41fa537804e8b502db to your computer and use it in GitHub Desktop.
Search huge json file ( +16G ) in no time

Read json huge file (>10G)

Script to stream json file and parse each datastore

Installing

npm install

Quickstart

This sample search for any datastore named "suggestions" ( eg : /ws/*/suggestions ) who owns a "categoryOrderIndex" property

node search-big-json-file.js
{
"description": "Search big json file",
"dependencies": {
"node-status": "^1.0.0",
"JSONStream": "^1.3.3"
}
}
const fs = require('fs');
const j = require('JSONStream');
const status = require('node-status');
const console = status.console();
let found = status.addItem('found');
let scope = status.addItem('scope');
// Console template
status.start({
pattern: 'Progress: {chk.green.bar} {chk.custom.magenta} | {uptime.green} {spinner.cyan} | Found: {found.magenta} | Objects: {scope} '
});
// File to stream
let file = 'sp.json';
// File progress, create file stream
fs.stat(file, (err, stat) => {
let total = stat.size;
let progress = 0;
let read = fs.createReadStream(file);
// Parse root
let stream = j.parse(['ws', true, {
recurse : false,
emitPath: true,
emitKey : true
}]);
// Iterate
stream.on('data', function (data) {
// Update console output
scope.inc(1);
// Stop on suggestion node
if (data.path[2] == 'suggestions') {
// console.log('path:', data.path);
// console.log('value:', data.value);
for (let key in data.value) {
//console.log(key)
if (data.value[key].hasOwnProperty('categoryOrderIndex')) {
// console.log('count :', cnt, 'nodeid :', data.path[1]);
// Update console output
found.inc(1);
return false;
}
}
}
});
read.pipe(stream);
stream.on('end', () => {
console.log('File ended');
});
read.on('data', (chunk) => {
progress += chunk.length;
let chk = status.addItem('chk', {
label : 'parsed chunk',
max : 100,
count : 0,
precision: 0,
custom : function () {
return `${this.count} %`;
}
});
chk.inc(Math.round(100 * progress / total));
})
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment