Skip to content

Instantly share code, notes, and snippets.

@Colum
Created January 22, 2024 19:10
Show Gist options
  • Save Colum/90fa9c91b417bdfabc9c04e81a6a0f1a to your computer and use it in GitHub Desktop.
Save Colum/90fa9c91b417bdfabc9c04e81a6a0f1a to your computer and use it in GitHub Desktop.
Delete documents with duplicate keys keeping 1
# Finds documents from the "users" collection which have duplicate values for
# key "kazoo_id", and deletes as many as needed to keep only 1 document.
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
uri = "..."
client = MongoClient(uri, server_api=ServerApi('1'))
deleted = 0
try:
db = client["cs_demo_prod"]
collection = db["users"]
result = collection.aggregate([
{"$group": {
"_id": "$kazoo_id",
"count": {"$sum": 1},
}},
{"$match": {
"count": {"$gt": 1},
"_id": {"$ne": None}
}}
])
for doc in result:
kazoo_id = doc["_id"]
to_delete = doc["count"]
for i in range(0, to_delete): # keeps 1 document of all the duplicates
collection.delete_one({"kazoo_id": kazoo_id})
deleted += 1
except Exception as e:
print(e)
print("Deleted " + str(deleted) + " records")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment