Skip to content

Instantly share code, notes, and snippets.

@getify
Last active August 29, 2015 13:56
Show Gist options
  • Save getify/8796439 to your computer and use it in GitHub Desktop.
Save getify/8796439 to your computer and use it in GitHub Desktop.
profiling memory usage issues with parsing a big JSON database
> node --expose-gc memtest.js
On my 64bit mac, this process uses about 391mb of memory according to "Activity Monitor".
> node --expose-gc memtest2.js
By contrast, if I read in 3.1mm records nearly identical to those being generated, from a .json file,
this process fills up 1.5gb of memory quickly, then crawls to a halt, and takes a REALLY long time to
get all the records finally pulled in (like hours).
What gives?
// simulates reading in 3.1mm JSON objects from file, each with format:
// { Name: "..", Latitude: 323.23423, Longitude: 2342.32234, Population: 443 }
// ...by creating them randomly
console.log("generating...");
// exposed via `--expose-gc` node flag
// http://simonmcmanus.wordpress.com/2013/01/03/forcing-garbage-collection-with-node-js-and-v8/
global.gc();
var recs = [], obj;
for (var i=0; i<3100000; i++) {
obj = {
Name: Math.random().toString(36).substring(20),
Latitude: Math.random() * 50,
Longitude: Math.random() * 50
};
// only add this every once in awhile
// (but predictably, not randomly)
if (i % 3976 === 0) obj.Population = Math.round(Math.random() * 1E9);
recs.push(obj);
// print out an intermittent status
// (to see memory pauses if any)
if (recs.length % 10000 === 0) console.log(recs.length);
}
console.log("done.");
// exposed via `--expose-gc` node flag
// http://simonmcmanus.wordpress.com/2013/01/03/forcing-garbage-collection-with-node-js-and-v8/
global.gc();
// hold onto the program for long enough to
// inspect the process memory
setTimeout(function(){
console.log("bye");
},1000*60*5);
console.log("reading...");
var recs = [];
var fs = require("fs");
var path = require("path");
// From:
// https://gist.github.com/creationix/5992451
var jsonMachine = require(path.join(__dirname,"jsonMachine.js"));
var filepath = path.join(__dirname,"myreallybig.json");
var read_stream = fs.createReadStream(filepath);
var i, l, state;
// exposed via `--expose-gc` node flag
// http://simonmcmanus.wordpress.com/2013/01/03/forcing-garbage-collection-with-node-js-and-v8/
global.gc();
state = jsonMachine(function __on_record__(rec){
if (rec) {
recs.push(rec);
if (recs.length % 10000 === 0) {
console.log(recs.length);
}
}
});
read_stream.on("data",function __data__(chunk){
for (i=0, l=chunk.length; i<l; i++) {
state = state(chunk[i]);
}
});
read_stream.on("end",function __end__(){
read_stream.removeAllListeners("data");
read_stream.removeAllListeners("end");
read_stream = null;
state.apply(null,arguments);
state = null;
console.log("done.");
// exposed via `--expose-gc` node flag
// http://simonmcmanus.wordpress.com/2013/01/03/forcing-garbage-collection-with-node-js-and-v8/
global.gc();
// hold onto the program for long enough to
// inspect the process memory
setTimeout(function(){
console.log("bye");
},1000*60*5);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment