Last active
May 16, 2017 13:08
-
-
Save tlbdk/68d1edb2ec37c9fc59e2d0b8c72f1c0f to your computer and use it in GitHub Desktop.
Parse mongoexport dumps in an memory efficient way
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require('fs'); | |
// Dump your collection with: mongoexport --collection collectionname --db dbname --out output.json --query '{ "myprop": "test" }' | |
var stream = fs.createReadStream("output.json", {flags: 'r', encoding: 'utf-8'}); | |
var buf = ''; | |
var count = 0; | |
stream.on('data', function(data) { | |
buf += data.toString(); | |
while(true) { | |
// Find the next record in buf | |
let start = buf.indexOf('{"_id"'); | |
let end = buf.indexOf('}\n'); | |
if(start > -1 && end > start) { | |
console.log(`${count++}`); | |
var json = buf.substr(start, end - start + 1); // Extract record from buf | |
let obj = JSON.parse(json); // Parse the JSON | |
buf = buf.slice(end + 2); // Remove the record from buf | |
// Do something with obj | |
} else { | |
break; | |
} | |
} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment