Skip to content

Instantly share code, notes, and snippets.

@pulkitsinghal
Last active August 15, 2017 16:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pulkitsinghal/2f3806670439fa137210fc26b134237f to your computer and use it in GitHub Desktop.
Save pulkitsinghal/2f3806670439fa137210fc26b134237f to your computer and use it in GitHub Desktop.
Using bluebird to scroll through (in pages/chunks) a large dataset in mongodb and nodejs
/**
* Motivation:
* Wanted to put together some code that used:
* - BlueBird (promises)
* - MongoDB NodeJS Driver
* - and paging that did not rely on skip()
*
* References:
* Based on articles such as:
* https://scalegrid.io/blog/fast-paging-with-mongodb/
* and GitHub puclic code searches such as:
* https://github.com/search?utf8=%E2%9C%93&q=bluebird+MongoClient+_id+find+limit+gt+language%3Ajavascript+&type=Code&ref=searchresults
* which yielded smaple code hits such as:
* https://github.com/HabitRPG/habitrpg/blob/28f2e9c356d7053884107d90d04e28dde75fa81b/migrations/api_v3/coupons.js#L71
*/
var Promise = require('bluebird'); // jshint ignore:line
var _ = require('lodash');
var MongoClient = require('mongodb').MongoClient;
var dbHandleForShutDowns;
// option a: great for debugging
var logger = require('tracer').console();
// option b: general purpose use
//var logger = console;
//...
var getPage = function getPage(db, collectionName, query, projection, pageSize, processPage) {
//console.log('DEBUG', 'filter:', JSON.stringify(query,null,2));
projection = (projection) ? projection['_id']=true : {'_id':true};
return db
.collection(collectionName)
.find(query)
.project(projection)
.sort({'_id':1}).limit(pageSize)
.toArray() // cursor methods return promises: http://mongodb.github.io/node-mongodb-native/2.1/api/Cursor.html#toArray
.then(function processPagedResults(documents) {
if (!documents || documents.length < 1) {
// stop - no data left to traverse
return Promise.resolve();
}
else {
if (documents.length < pageSize) {
// stop - last page
return processPage(documents);
}
else {
return processPage(documents) // process the results of the current page
.then(function getNextPage(){ // then go get the next page
var last_id = documents[documents.length-1]['_id'];
query['_id'] = {'$gt' : last_id};
return getPage(db, collectionName, query, projection, pageSize, processPage);
});
}
}
});
};
//...
return MongoClient
.connect(params.dbUrl, {
promiseLibrary: Promise
})
.then(function(db) {
dbHandleForShutDowns = db;
return getPage(db, collectionName, {}, {}, 5, function processPage(pagedDocs){console.log('do something with', pagedDocs);})
.finally(db.close.bind(db));
})
.catch(function(err) {
console.error("ERROR", err);
dbHandleForShutDowns.close();
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment