Skip to content

Instantly share code, notes, and snippets.

@zhangpengGenedock
Created July 7, 2017 03:12
Show Gist options
  • Save zhangpengGenedock/cd26b407ebb5d4e9076705f542214b3b to your computer and use it in GitHub Desktop.
Save zhangpengGenedock/cd26b407ebb5d4e9076705f542214b3b to your computer and use it in GitHub Desktop.
mongodb create multi field unique index and remove duplicates
var bulk = db.testkdd.initializeOrderedBulkOp(),
count = 0;
// List "all" fields that make a document "unique" in the `_id`
// I am only listing some for example purposes to follow
db.testkdd.aggregate([
{ "$group": {
"_id": {
"duration" : "$duration",
"protocol_type": "$protocol_type",
"service": "$service",
"flag": "$flag"
},
"ids": { "$push": "$_id" },
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } }
],{ "allowDiskUse": true}).forEach(function(doc) {
doc.ids.shift(); // remove first match
bulk.find({ "_id": { "$in": doc.ids } }).remove(); // removes all $in list
count++;
// Execute 1 in 1000 and re-init
if ( count % 1000 == 0 ) {
bulk.execute();
bulk = db.testkdd.initializeOrderedBulkOp();
}
});
if ( count % 1000 != 0 )
bulk.execute();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment