Skip to content

Instantly share code, notes, and snippets.

@alexbevi
Last active February 24, 2020 14:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexbevi/dc51c0ce4820f46e46feca06dbdc64bb to your computer and use it in GitHub Desktop.
Save alexbevi/dc51c0ce4820f46e46feca06dbdc64bb to your computer and use it in GitHub Desktop.
Variation of collStats command that can be run in a sharded environment through the Mongo shell and assemble the result in Javascript to bypass BSON size limitations

As a result of SERVER-44891 which was closed as Won't Fix I created this workaround. This script can be verified as follows using mtools, m and mgeneratejs:

First, initialize the cluster:

mkdir shardedCluster
m 4.0.9
mlaunch init --replicaset --nodes 1 --sharded 60 --csrs --binarypath $(m bin 4.0.9) --dir shardedCluster --auth

Next, log into the cluster (mongo --norc -u user -p password --authenticationDatabase admin) and run the following:

sh.enableSharding("test")
sh.shardCollection("test.test1", { statusID: 1 } )
db.getSiblingDB("config").settings.save( { _id:"chunksize", value: 1 } )

Exit the mongo shell, and from bash using mgeneratejs seed the cluster:

mgeneratejs -n 500000 '{statusID: {"$numberLong": {"min": 100, "max": 100000}},metadata: {blacklistType: {"$numberLong": {"min": 1020, "max": 1100}}},groupID: {"$numberLong": {"min": 1600, "max": 5000}},departmentID: {"$numberLong": {"min": 1600, "max": 5000}},assignedToID: {"$choose": {"from": ["", "123", "456"], "weights": [1, 2, 2]}},allowedUsers: {"$choose": {"from": ["", "345", "678"], "weights": [1, 2, 2]}},typeID: {"$numberLong": {"min": 100750, "max": 100760}},isFileProhipted: "$bool",fileWriter: {"$choose": {"from": ["", "xxx", "yyy"], "weights": [1, 2, 2]}},assignedByID: {"$numberLong": {"min": 200750, "max": 200760}}}' | mongoimport -u user -p password -c test1

Once seeded, connect to the cluster again and create a large number of indexes:

db.test1.createIndexes([
{ "statusID":1 },
{ "metadata":1 },
{ "groupID":1 },
{ "departmentID":1 },
{ "assignedToID":1 },
{ "allowedUsers":1 },
{ "typeID":1 },
{ "isFileProhipted":1 },
{ "fileWriter":1 },
{ "assignedByID":1 },
{ "statusID":1,"metadata":1 },
{ "groupID":1,"metadata":1 },
{ "departmentID":1,"metadata":1 },
{ "assignedToID":1,"metadata":1 },
{ "allowedUsers":1,"metadata":1 },
{ "typeID":1,"metadata":1 },
{ "isFileProhipted":1,"metadata":1 },
{ "fileWriter":1,"metadata":1 },
{ "assignedByID":1,"metadata":1 },
{ "statusID":1,"groupID":1 },
{ "metadata":1,"groupID":1 },
{ "groupID":1,"groupID":1 },
{ "departmentID":1,"groupID":1 },
{ "assignedToID":1,"groupID":1 },
{ "allowedUsers":1,"groupID":1 },
{ "statusID":1,"departmentID":1 },
{ "metadata":1,"departmentID":1 },
{ "groupID":1,"departmentID":1 },
{ "departmentID":1,"departmentID":1 },
{ "assignedToID":1,"departmentID":1 },
{ "allowedUsers":1,"departmentID":1 },
{ "typeID":1,"departmentID":1 },
{ "isFileProhipted":1,"departmentID":1 },
{ "fileWriter":1,"departmentID":1 },
{ "assignedByID":1,"departmentID":1 },
{ "statusID":1,"assignedToID":1 },
{ "metadata":1,"assignedToID":1 },
{ "groupID":1,"assignedToID":1 },
{ "departmentID":1,"assignedToID":1 },
{ "assignedToID":1,"assignedToID":1 },
{ "allowedUsers":1,"assignedToID":1 },
{ "typeID":1,"assignedToID":1 },
{ "isFileProhipted":1,"assignedToID":1 },
{ "fileWriter":1,"assignedToID":1 },
{ "assignedByID":1,"assignedToID":1 }
])

Download the shardedCollStats.js script and save it.

To verify the behavior of this method, from the mongo shell try the following:

// this should fail now
db.test1.stats({ indexDetails: true, scale: 1024 });
load('shardedCollStats.js');
// this should succeeed
shardedCollStats("test","test1","user","password");
/**
* Provides an alternative implementation of the `db.collection.stats()` method for sharded
* clusters where the total BSON document size of the stats may exceed the BSON document size limit
* of 16MB, such as the following scenario:
*
* mongos> db.runCommand({ collStats : "collection", scale: 1024 })
* {
* "ok" : 0,
* "errmsg" : "BSONObj size: 17465286 (0x10A7FC6) is invalid. Size must be between 0 and 16793600(16MB) First element: sharded: true",
* "code" : 10334,
* "codeName" : "Location10334"
* }
*
* Usage:
* shardedCollStats(database, collection [,username, password])
*
* @param database The database name the sharded collection belongs to
* @param collection The sharded collection
* @param user (optional) username for authentication
* @param password (optional) password for authentication
*/
var shardedCollStats = function(database, collection, user, password) {
var collStats = {
"sharded": true,
"capped": db.getSiblingDB(database).getCollection(collection).isCapped(),
"ns": database + "." + collection,
"count": 0,
"size": 0,
"storageSize": 0,
"totalIndexSize": 0,
"indexSizes": {},
"avgObjSize": 0,
"nindexes": 0,
"nchunks": 0,
"shards": {},
"ok": 1
}
var appendIndexSizes = function(record, indexSizesDoc) {
var keys = Object.keys(indexSizesDoc);
for (var i = 0; i < keys.length; i++) {
var idx = keys[i];
if (record.hasOwnProperty(idx)) {
record[idx] += indexSizesDoc[idx];
} else {
record[idx] = indexSizesDoc[idx];
}
}
return record;
}
db.getSiblingDB("config").shards.find().sort({ _id: 1 }).forEach(function(conn) {
var shard = new Mongo(conn.host);
// just exit
if ((user && password) && shard.getDB("admin").auth(user, password) != 1) {
return;
}
var shardCollStats = shard.getDB(database).getCollection(collection).aggregate({ $collStats: { storageStats: { scale: 1024 } } }).toArray()[0];
var stats = shardCollStats.storageStats;
collStats["shards"][conn._id] = stats;
collStats["shards"][conn._id]["ns"] = shardCollStats.ns;
collStats["shards"][conn._id]["ok"] = 1;
collStats["count"] += stats["count"];
collStats["size"] += stats["size"];
collStats["storageSize"] += stats["storageSize"];
collStats["totalIndexSize"] += stats["totalIndexSize"];
collStats["avgObjSize"] += stats["avgObjSize"];
collStats["nindexes"] += stats["nindexes"];
collStats["indexSizes"] = appendIndexSizes(collStats["indexSizes"], stats["indexSizes"]);
});
// average should be divided by the number of shards
collStats["avgObjSize"] /= Object.keys(collStats["shards"]).length;
collStats["nindexes"] = Object.keys(collStats["indexSizes"]).length;
collStats["nchunks"] = db.getSiblingDB(database).chunks.count({ ns: database + "." + collection });
return collStats;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment