Skip to content

Instantly share code, notes, and snippets.

@michelem09
Last active August 8, 2022 07:49
Show Gist options
  • Save michelem09/170a07d877e2787f7a53 to your computer and use it in GitHub Desktop.
Save michelem09/170a07d877e2787f7a53 to your computer and use it in GitHub Desktop.
Gist to update several items in series in a huge MongoDB collection (millions of records) with Node.js and Async module
'use strict';
/**
* Module dependencies.
*/
var async = require('async'),
mongoose = require('mongoose'),
ObjectId = mongoose.Types.ObjectId;
var db = mongoose.connect('mongodb://localhost:27017/test', {}, function (err) {
if (err) {
console.error('\x1b[31m', 'Could not connect to MongoDB!');
console.log(err);
} else {
console.log('Connected to MongoDB: ' + config.db);
// This is a huge collection you have to update some fields
var Hugecollection = mongoose.model('Hugecollection');
// First of all count the total items
Hugecollection.count({}, function (err, tot) {
if (err) return console.log(err);
var skip = 0,
limit = 10000
pages = (tot / limit).toFixed(0),
a = [];
console.log('Total items: ' + tot + ' - Total pages: ' + pages);
// Create an array of the page numbers to be queried
for (skip = 0; skip <= pages; skip++) {
a.push(skip);
}
// Give the page numbers array to an eachSeries method from Async,
// It will loop through them asking a number of limited (10000) items per time
// that's better then query for all items when you have a lot
async.eachSeries(a, function (key, done) {
// Find each batch of items
Hugecollection.find({}).select({
_id: 1,
fieldtobechanged1: 1,
fieldtobechanged2: 1
}).limit(limit).skip(key * limit).sort({
_id: 1
}).exec(function (err, items) {
if (err) return console.log(err);
console.log("Doing: " + key + "/" + pages + " - TEST THIS: " + items[0]._id);
// Asynchrously loop each item in the batch and do your changes
async.each(items, function (item, callback) {
if (item.fieldtobechanged1 && item.fieldtobechanged2) {
var doYourChange = function (value) {
return value;
},
fieldtobechanged1 = doYourChange(item.fieldtobechanged1);
fieldtobechanged2 = doYourChange(item.fieldtobechanged2);
// Now update the item with your changes and you are done
Hugecollection.findOneAndUpdate({
_id: new ObjectId(item._id)
}, {
$set: {
fieldtobechanged1: fieldtobechanged1,
fieldtobechanged2: fieldtobechanged2
}
}, function (err, updated) {
callback();
});
} else {
callback();
}
}, done()); // done is call when all items are updated!
});
}, function (err) {
console.log('Done!');
mongoose.disconnect();
});
});
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment