Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save zhangpengGenedock/7218936973365f46007a370b03cd1b4d to your computer and use it in GitHub Desktop.
Save zhangpengGenedock/7218936973365f46007a370b03cd1b4d to your computer and use it in GitHub Desktop.
gddata daily stats delete duplicate
var bulk = db.gddata_daily_stats.initializeOrderedBulkOp(),
count = 0;
// List "all" fields that make a document "unique" in the `_id`
// I am only listing some for example purposes to follow
db.gddata_daily_stats.aggregate([
{ "$group": {
"_id": {
"account_name" : "$account_name",
"date": "$date",
},
"ids": { "$push": "$_id" },
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } }
],{ "allowDiskUse": true}).forEach(function(doc) {
doc.ids.shift(); // remove first match
bulk.find({ "_id": { "$in": doc.ids } }).remove(); // removes all $in list
count++;
// Execute 1 in 1000 and re-init
if ( count % 1000 == 0 ) {
bulk.execute();
bulk = db.gddata_daily_stats.initializeOrderedBulkOp();
}
});
if ( count % 1000 != 0 )
bulk.execute();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment