Skip to content

Instantly share code, notes, and snippets.

@DamianMullins
Last active January 5, 2023 14:40
Show Gist options
  • Save DamianMullins/af6d32cd5830597dba4fee7ad911377c to your computer and use it in GitHub Desktop.
Save DamianMullins/af6d32cd5830597dba4fee7ad911377c to your computer and use it in GitHub Desktop.
Clean MongoDB nested array duplicated records
const assert = require('assert');
const { MongoClient } = require('mongodb');
MongoClient.connect('mongodb://127.0.0.1:27017/', (err, client) => {
assert.equal(null, err);
console.log('Connected correctly to server\n');
async function run(arrayName) {
const database = client.db('data-integrator');
const orders = database.collection('orders');
const fieldPath = `$${arrayName}`;
const pipeline = [
// {
// $match: {
// 'externalSource.id': '63a1f9027c2a697cc587da8d',
// },
// },
{
$unwind: {
path: fieldPath,
},
},
{
$group: {
_id: {
_id: '$_id',
[arrayName]: fieldPath,
},
count: {
$sum: 1,
},
[arrayName]: {
$push: fieldPath,
},
},
},
{
$match: {
count: {
$ne: 1,
},
},
},
{
$project: {
_id: '$_id._id',
[arrayName]: {
$slice: [
fieldPath,
1,
{
$subtract: [
{
$size: fieldPath,
},
1,
],
},
],
},
},
},
];
const cursor = orders.aggregate(pipeline);
const batch = orders.initializeOrderedBulkOp();
await cursor.forEach(async doc => {
await doc[arrayName].forEach(dup => {
batch
.find({
_id: doc._id,
[arrayName]: { $elemMatch: dup },
})
.updateOne({
$unset: { [`${arrayName}.$`]: '' },
});
batch.find({ _id: doc._id }).updateOne({
$pull: { [arrayName]: null },
});
});
});
console.log(`${batch.length} records found for ${arrayName}\n`);
if (batch.length > 0) {
await batch.execute();
console.log(JSON.stringify(batch.batches, null, 2));
}
}
run('courierHistory')
.then(run('statusHistory'))
.then(() => client.close())
.catch(console.dir);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment