Skip to content

Instantly share code, notes, and snippets.

@toto-dev
Last active January 17, 2020 11:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save toto-dev/c44dd19bb5f2d2ca46c8aa75d779f974 to your computer and use it in GitHub Desktop.
Save toto-dev/c44dd19bb5f2d2ca46c8aa75d779f974 to your computer and use it in GitHub Desktop.
Partitioning session collection
/*
* This script is meant to be executed on a mongos instance of a MongoDB
* sharded cluster.
*
* Sessions records are small in size, so usually the sessions collection
* doesn't get automatically splitted by the auto-splitter and thus it is
* not balanced automatically. This can cause an high load on the primary
* shard of the sessions collection.
*
* This script splits and distributes the `config.system.sessions`
* collection information among all the shards in the cluster. It creates
* and move one chunk to every shard in the cluster.
*/
// Number of chars of the first two groups (most-significant) in UUID string
var UUIDHeadChars = 12;
// Max number of integers that can be represented using
var UUIDHeadMax = Math.pow(16, UUIDHeadChars);
/*
* Generate an UUID that has @num encoded in the first 12 chars
*/
function UUIDFromInt(num) {
assert(num < UUIDHeadMax);
// Format num as hexadecimal string with enough left-zero-padding
// to reach UUIDHeadChars
head =
(('0'.repeat(UUIDHeadChars)) + num.toString(16)).substr(-UUIDHeadChars);
assert.eq(head.length, UUIDHeadChars);
// return UUID(HHHHHHHH-HHHH-0000-0000-000000000000)
return UUID(
head.substr(0, 8) + '-' + head.substr(8) + '-' +
'0'.repeat(4) + '-' +
'0'.repeat(4) + '-' +
'0'.repeat(12));
}
/*
* Generate split points to partition a UUID space in
* @numChunks of equally-sized chunks.
*/
function genUUIDSplitPoints(numChunks) {
var splitPoints = [];
var gap = Math.round(UUIDHeadMax / numChunks);
var currHead = 0;
for (var i = 0; i < (numChunks - 1); i++) {
currHead += gap;
splitPoints.push(UUIDFromInt(currHead));
}
return splitPoints;
}
function chunkInfos(ns) {
var s = '';
confDB.chunks.find({ns: ns}).sort({ns: 1, min: 1}).forEach(function(z) {
s += ' ' + z._id + ' ' + z.shard + '\n\tmin: ' + tojson(z.min) +
'\n\tmax: ' + tojson(z.max) + '\n';
});
return s;
}
var confDB = db.getSiblingDB('config');
var sessNS = 'config.system.sessions';
// Ensure session collection is sharded
assert.eq(
'sharded',
confDB.collections.findOne({_id: 'config.system.sessions'})
.distributionMode,
'Sessions collection is not sharded');
// Ensure no split have been done so far
assert.eq(
1, confDB.chunks.count({ns: sessNS}),
'Sessions collection has been already splitted. ' +
'There are more then one chunks already');
sh.disableBalancing(sessNS);
var shards = confDB.shards.find().toArray();
var splitPoints = genUUIDSplitPoints(shards.length);
assert.eq(splitPoints.length, shards.length - 1);
// Split collection
for (var i = 0; i < splitPoints.length; i++) {
var splitPt = {_id: {id: splitPoints[i]}};
assert.commandWorked(sh.splitAt(sessNS, splitPt));
}
// Distribute one chunk to every shard
for (var i = 0; i < splitPoints.length; i++) {
var splitPt = {_id: {id: splitPoints[i]}};
assert.commandWorked(sh.moveChunk(sessNS, splitPt, shards[i + 1]._id));
}
sh.enableBalancing(sessNS);
print('\n### Chunks info for \'config.system.sesssions\' collection\n')
print(chunkInfos(sessNS));
// Number of chars of the first two groups (most-significant) in UUID string
const UUIDHeadChars = 12;
// Max number of integers that can be represented using
const UUIDHeadMax = Math.pow(16, UUIDHeadChars);
/*
* Generate an UUID that has @num encoded in the first 12 chars
*/
function UUIDFromInt(num) {
assert(num < UUIDHeadMax);
// Format num as hexadecimal string with enough left-zero-padding
// to reach UUIDHeadChars
head =
(('0'.repeat(UUIDHeadChars)) + num.toString(16)).substr(-UUIDHeadChars);
assert.eq(head.length, UUIDHeadChars);
// return UUID(HHHHHHHH-HHHH-0000-0000-000000000000)
return UUID(
head.substr(0, 8) + '-' + head.substr(8) + '-' +
'0'.repeat(4) + '-' +
'0'.repeat(4) + '-' +
'0'.repeat(12));
}
/*
* Generate split points to partition a UUID space in
* @numChunks of equally-sized chunks.
*/
function genUUIDSplitPoints(numChunks) {
var splitPoints = [];
var gap = Math.round(UUIDHeadMax / numChunks);
var currHead = 0;
for (var i = 0; i < (numChunks - 1); i++) {
currHead += gap;
splitPoints.push(UUIDFromInt(currHead));
}
return splitPoints;
}
/*
* Get enriched chunk statistics for namespace @ns
* thrugh @mongos
*/
function chunkStats(ns) {
var chunks = st.s.getCollection('config.chunks').find({'ns': ns}).toArray();
chunks.forEach(function(chunk) {
chunk['numDocs'] =
st.s.getCollection(ns)
.find({_id: {$gt: chunk.min._id, $lt: chunk.max._id}})
.count();
});
return chunks;
}
function genRndBinData() {
return BinData(0, Math.random().toString(36).substring(2, 6));
}
function genRndSK() {
return {'_id': UUID(), 'uid': genRndBinData()};
}
var st = new ShardingTest({shards: 1, config: 1, nodes: 1});
const dbName = 'testDB';
const collName = 'cll';
const sk = '_id';
const ns = dbName + '.' + collName;
function insertRndDocs(num) {
var bulk = st.s.getCollection(ns).initializeUnorderedBulkOp();
for (var i = 0; i < num; i++) {
bulk.insert({_id: genRndSK()});
}
bulk.execute();
}
function printDocsCount(ns) {
print('Docs distribution on chunks:');
var i = 0;
chunkStats(ns).forEach(function(chunk) {
print(' ' + i++ + ' > ' + chunk['numDocs']);
});
}
assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
assert.commandWorked(st.s.adminCommand({shardcollection: ns, key: {_id: 1}}));
genUUIDSplitPoints(30).forEach(function(splitPoint) {
st.splitAt(ns, {_id: {'_id': splitPoint}});
});
insertRndDocs(30);
printDocsCount(ns);
var co = st.s.getCollection(ns);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment