Skip to content

Instantly share code, notes, and snippets.

@Dartv
Created October 1, 2015 20:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dartv/96a0ae477b48d6af1c3f to your computer and use it in GitHub Desktop.
Save Dartv/96a0ae477b48d6af1c3f to your computer and use it in GitHub Desktop.
Remove duplicates and leave one document with the most difficultyrating
db.beatmaps.aggregate([
{
"$group": {
"_id": "$beatmapset_id",
"count": { "$sum": 1 },
"uniqueIds": { "$addToSet": "$_id" },
"maxRating": { "$max": "$difficultyrating" }
}
},
{
"$match": {
"count": { "$gte": 2 }
}
},
{
"$sort" : { "count" : -1 }
}
]);
var pipeline = [
{
"$group": {
"_id": "$beatmapset_id",
"count": { "$sum": 1 },
"uniqueIds": { "$addToSet": "$_id" },
"maxRating": { "$max": "$difficultyrating" }
}
},
{
"$match": {
"count": { "$gte": 2 }
}
},
{
"$sort" : { "count" : -1 }
}
],
counter = 0,
bulk = db.beatmaps.initializeOrderedBulkOp();
db.beatmaps.aggregate(pipeline).forEach(function(doc) {
bulk.find({
"_id": { "$in": doc.uniqueIds },
"difficultyrating": { "$lt": doc.maxRating }
}).remove();
counter++;
if ( counter % 500 == 0 ) {
// Execute per 500 operations and re-init.
bulk.execute();
bulk = db.beatmaps.initializeOrderedBulkOp();
}
});
// Catch any under or over the 500's and clean up queues
if (count % 500 != 0)
bulk.execute();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment