Skip to content

Instantly share code, notes, and snippets.

@mjhm
Created January 22, 2013 04:18
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mjhm/4592006 to your computer and use it in GitHub Desktop.
Save mjhm/4592006 to your computer and use it in GitHub Desktop.
This is a performance test of several methods of updating every item in 100000 row MongoDB collection. See comment below for results of test on a small EC2 instance.
#! /usr/bin/node
var MongoClient = require('mongodb').MongoClient
var Server = require('mongodb').Server;
var async = require('async');
var util = require('util');
// A simple linear congruence random number generator.
// This is anticipating doing a comparable test with the aggregation pipeline.
var randMod = 2 << 24;
var randMult = 1140671485;
var randAdd = 12820163;
function myRand (seed) {
return (randMult*seed + randAdd) % randMod;
}
// Initial Documents to insert.
var ndoc = 100000;
var i = ndoc;
var docList = [];
while (i) {
i -= 1;
docList[i] = {randomVal: myRand(i)};
}
var mongoClient = new MongoClient(new Server('localhost', 27017));
var db = null;
var coll = null;
var anotherColl = null;
var finalReport = '';
// Running all the tests with explicit async.series tasks.
async.series([
function (dbOpenDone) {
mongoClient.open( function(err, mongoClient) {
if (err) {
dbOpenDone(err);
return;
}
db = mongoClient.db('test');
coll = db.collection('update_benchmark');
anotherColl = db.collection('update_benchmark_tmp');
// db.admin().setProfilingLevel('all', function (plErr) {
// dbOpenDone(plErr);
// });
dbOpenDone(err);
});
},
function (dropDone) {
// dropDone(null); return; // uncomment to skip drop
if (coll) {
coll.drop(function (err) {
// Ignore errors from drop.
dropDone(null);
});
} else {
dropDone(null);
}
},
function (dropDone) {
// dropDone(null); return; // uncomment to skip drop
if (anotherColl) {
anotherColl.drop(function (err) {
// Ignore errors from drop.
dropDone(null);
});
} else {
dropDone(null);
}
},
function (insertDone) {
// insertDone(null); return; // uncomment to skip insert
console.log('Inserting docList');
var iChunk = 0;
var chunkSize = 10000;
async.until(function () {return (iChunk >= docList.length);},
function (insertChunkDone) {
coll.insert(docList.slice(iChunk, iChunk + chunkSize), function (insErr) {
iChunk += chunkSize;
process.nextTick(function () {
console.log('Finished Insert ' + iChunk);
insertChunkDone(insErr);
});
});
},
insertDone
);
},
// Test Scenario 1: Pull all values onto the client and push them back with individual saves.
function (testScenario1Done) {
// testScenario1Done(null); return; // uncomment to skip testScenario 1
var description = 'Big find().toArray, and individual saves.';
console.log('Starting Test Scenario 1');
var testStart = new Date().getTime();
coll.find().toArray(function (err, itemList) {
if (err) {
testScenario1Done(err);
}
console.log('Retrieved all items in: ' + ((new Date().getTime() - testStart)/1000) + ' seconds');
itemList.forEach(function (item) {
item.randomVal = myRand(item.randomVal);
});
var i = 0;
async.forEach(itemList, function (item, itemSaveDone) {
var checkpointNote = null;
i += 1;
if (i % 10000 === 0) {
console.log('(1) Saving ' + i);
checkpointNote = '(1) Finished saving ' + i;
}
coll.save(item, function (err) {
if (checkpointNote) {
console.log(checkpointNote);
}
itemSaveDone(err);
});
},
function (err) {
var elapsed = (new Date().getTime() - testStart)/1000;
var rpt = 'testScenario1: ' + elapsed + ' seconds';
console.log(rpt);
finalReport += rpt + ' (' + description + ')\n';
testScenario1Done(err);
});
});
},
// Test Scenario 2: Pull all values onto the client and push them back with individual updates.
function (testScenario2Done) {
// testScenario2Done(null); return; // uncomment to skip testScenario 2
var description = 'Big find().toArray, and individual updates.';
console.log('Starting Test Scenario 2');
var testStart = new Date().getTime();
coll.find().toArray(function (err, itemList) {
if (err) {
testScenario1Done(err);
}
console.log('Retrieved all items in:' + ((new Date().getTime() - testStart)/1000) + ' seconds');
var i = 0;
async.forEach(itemList, function (item, itemUpdateDone) {
var checkpointNote = null;
i += 1;
if (i % 10000 === 0) {
console.log('(2) Updating ' + i);
checkpointNote = '(2) Finished updating ' + i;
}
coll.update(
{_id: item._id},
{$set: {randomVal: myRand(item.randomVal)}},
function (err) {
if (checkpointNote) {
console.log(checkpointNote);
}
itemUpdateDone(err);
}
);
},
function (err) {
var testScenario2Elapsed = (new Date().getTime() - testStart)/1000;
var rpt = 'testScenario2: ' + testScenario2Elapsed + ' seconds';
console.log(rpt);
finalReport += rpt + ' (' + description + ')\n';
testScenario2Done(err);
});
});
},
// Test Scenario 3: Iterate on cursor (small batch) and use update.
function (testScenario3Done) {
// testScenario3Done(null); return; // uncomment to skip testScenario 3
var description = 'Iterate on cursor (size = 10), then update.';
console.log('Starting Test Scenario 3');
var testStart = new Date().getTime();
var curs = coll.find().batchSize(10);
curs.count(function(err, count) {
var i = 0;
curs.each(function (err, item) {
var checkpointNote = null;
if (err) {
testScenario3Done(err);
}
if (!item) {
return;
}
i += 1;
if (i % 10000 === 0) {
checkpointNote = '(3) Finished updating ' + i;
console.log('(3) Updating ' + i);
}
coll.update(
{_id: item._id},
{$set: {randomVal: myRand(item.randomVal)}},
function (err) {
if (checkpointNote) {
console.log(checkpointNote);
}
if (err) {
testScenario3Done(err);
}
count -= 1;
if (count === 0) {
var elapsed = (new Date().getTime() - testStart)/1000;
var rpt = 'testScenario3: ' + elapsed + ' seconds';
console.log(rpt);
finalReport += rpt + ' (' + description + ')\n';
testScenario3Done(null);
}
}
);
});
});
},
// Test Scenario 4: Iterate on cursor (large batch) and use update.
function (testScenario4Done) {
// testScenario4Done(null); return; // uncomment to skip testScenario 4
var description = 'Iterate on cursor (size = 10000), then update.';
console.log('Starting Test Scenario 4');
var testStart = new Date().getTime();
var curs = coll.find().batchSize(10000);
curs.count(function(err, count) {
var i = 0;
curs.each(function (err, item) {
var checkpointNote = null;
if (err) {
testScenario4Done(err);
}
if (!item) {
return;
}
i += 1;
if (i % 10000 === 0) {
checkpointNote = '(4) Finished updating ' + i;
console.log('(4) Updating ' + i);
}
coll.update(
{_id: item._id},
{$set: {randomVal: myRand(item.randomVal)}},
function (err) {
if (checkpointNote) {
console.log(checkpointNote);
}
if (err) {
testScenario4Done(err);
}
count -= 1;
if (count === 0) {
var elapsed = (new Date().getTime() - testStart)/1000;
var rpt = 'testScenario4: ' + elapsed + ' seconds';
console.log(rpt);
finalReport += rpt + ' (' + description + ')\n';
testScenario4Done(null);
}
}
);
});
});
},
// Test Scenario 5: Iterate on cursor (large batch) and fill a brand new collection.
function (testScenario5Done) {
// testScenario5Done(null); return; // uncomment to skip testScenario 5
var description = 'Iterate on cursor (size = 10000), then update.';
console.log('Starting Test Scenario 5');
var testStart = new Date().getTime();
var curs = coll.find().batchSize(10000);
curs.count(function(err, count) {
var i = 0;
var newDocList = [];
curs.each(function (err, item) {
var checkpointNote = null;
if (err) {
testScenario5Done(err);
}
if (!item) {
return;
}
item.randomVal = myRand(item.randomVal);
newDocList[i%10000] = item;
i += 1;
if (i % 10000 === 0 || i === count) {
checkpointNote = '(5) Finished updating ' + i;
console.log('(5) Updating ' + i);
anotherColl.insert(newDocList, function (err) {
if (checkpointNote) {
console.log(checkpointNote);
}
if (err) {
testScenario5Done(err);
}
if (i === count) {
var elapsed = (new Date().getTime() - testStart)/1000;
var rpt = 'testScenario5: ' + elapsed + ' seconds';
console.log(rpt);
finalReport += rpt + ' (' + description + ')\n';
testScenario5Done(err);
}
});
}
});
});
}
],
function (err) {
console.log('\n\nFinal Report:\n' + finalReport);
mongoClient.close();
if (err) {
console.log(err);
process.exit(1);
}
process.exit(0);
});
@mjhm
Copy link
Author

mjhm commented Jan 22, 2013

Here are the results running on a small EC2 instance with the MongoDB at localhost.

  • testScenario1: 108.661 seconds -- Uses find().toArray to pull in all the items at once then replaces the documents with individual "save" calls.
  • testScenario2: 99.645 seconds -- Uses find().toArray to pull in all the items at once then updates the documents with individual "update" calls.
  • testScenario3: 74.553 seconds -- Iterates on the cursor (find().each) with batchSize = 10, then uses individual update calls.
  • testScenario4: 58.673 seconds -- Iterates on the cursor (find().each) with batchSize = 10000, then uses individual update calls.
  • testScenario5: 4.727 seconds -- Iterates on the cursor with batchSize = 10000, and does inserts into a new collection10000 items at a time.

Though not included I also did a test with MapReduce used as a server side filter which ran at about 19 seconds. I would have liked to have similarly used "aggregate" as a server side filter, but it doesn't yet have an option to output to a collection.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment