Created
January 22, 2024 19:10
-
-
Save Colum/90fa9c91b417bdfabc9c04e81a6a0f1a to your computer and use it in GitHub Desktop.
Delete documents with duplicate keys keeping 1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Finds documents from the "users" collection which have duplicate values for | |
# key "kazoo_id", and deletes as many as needed to keep only 1 document. | |
from pymongo.mongo_client import MongoClient | |
from pymongo.server_api import ServerApi | |
uri = "..." | |
client = MongoClient(uri, server_api=ServerApi('1')) | |
deleted = 0 | |
try: | |
db = client["cs_demo_prod"] | |
collection = db["users"] | |
result = collection.aggregate([ | |
{"$group": { | |
"_id": "$kazoo_id", | |
"count": {"$sum": 1}, | |
}}, | |
{"$match": { | |
"count": {"$gt": 1}, | |
"_id": {"$ne": None} | |
}} | |
]) | |
for doc in result: | |
kazoo_id = doc["_id"] | |
to_delete = doc["count"] | |
for i in range(0, to_delete): # keeps 1 document of all the duplicates | |
collection.delete_one({"kazoo_id": kazoo_id}) | |
deleted += 1 | |
except Exception as e: | |
print(e) | |
print("Deleted " + str(deleted) + " records") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment