Last active
September 19, 2022 03:32
-
-
Save messa/4407772e87c61e193b3bf2a777a6e0e0 to your computer and use it in GitHub Desktop.
MongoDB pymongo bulk update all documents in a collection
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from bson import MinKey | |
import pymongo | |
from pymongo import ASCENDING | |
client = pymongo.MongoClient() | |
db = client['test'] | |
collection = db['dataset'] | |
# only for testing - of course do not do drop() in production | |
collection.drop() | |
# First, insert some documents | |
for i in range(500): | |
docs = [] | |
for k in range(1000): | |
docs.append({ | |
'foo': 'barbar', | |
}) | |
collection.insert_many(docs) | |
print('Inserted {} docs, collection has now {} docs'.format(len(docs), collection.count())) | |
print('Collection contains {} documents'.format(collection.count())) | |
print('First 10 documents:', list(collection.find(limit=10))) | |
# Now, let's update the schema - for example rename 'foo' to 'foofoo'. | |
last_id = MinKey() | |
while True: | |
q = {'_id': {'$gt': last_id}} | |
docs = list(collection.find(q, sort=[('_id', ASCENDING)], limit=1000)) | |
if not docs: | |
print('All documents have been migrated') | |
break | |
print('Selected {} documents with _id > {} to migrate'.format(len(docs), last_id)) | |
bulk = collection.initialize_unordered_bulk_op() | |
for doc in docs: | |
if doc.get('foo'): | |
bulk.find({'_id': doc['_id']}).update({'$rename': {'foo': 'foofoo'}}) | |
bulk.execute() | |
last_id = docs[-1]['_id'] | |
print('First 10 documents:', list(collection.find(limit=10))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ ./demo.py | |
Inserted 1000 docs, collection has now 1000 docs | |
Inserted 1000 docs, collection has now 2000 docs | |
Inserted 1000 docs, collection has now 3000 docs | |
... | |
Inserted 1000 docs, collection has now 498000 docs | |
Inserted 1000 docs, collection has now 499000 docs | |
Inserted 1000 docs, collection has now 500000 docs | |
Collection contains 500000 documents | |
First 10 documents: [{'_id': ObjectId('5866d916b8de570ae5d95206'), 'foo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d95207'), 'foo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d95208'), 'foo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d95209'), 'foo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520a'), 'foo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520b'), 'foo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520c'), 'foo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520d'), 'foo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520e'), 'foo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520f'), 'foo': 'barbar'}] | |
Selected 1000 documents with _id > MinKey() to migrate | |
Selected 1000 documents with _id > 5866d916b8de570ae5d955ed to migrate | |
Selected 1000 documents with _id > 5866d916b8de570ae5d959d5 to migrate | |
Selected 1000 documents with _id > 5866d916b8de570ae5d95dbd to migrate | |
Selected 1000 documents with _id > 5866d916b8de570ae5d961a5 to migrate | |
... | |
Selected 1000 documents with _id > 5866d921b8de570ae5e0e76d to migrate | |
Selected 1000 documents with _id > 5866d921b8de570ae5e0eb55 to migrate | |
Selected 1000 documents with _id > 5866d921b8de570ae5e0ef3d to migrate | |
All documents have been migrated | |
First 10 documents: [{'_id': ObjectId('5866d916b8de570ae5d95206'), 'foofoo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d95207'), 'foofoo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d95208'), 'foofoo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d95209'), 'foofoo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520a'), 'foofoo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520b'), 'foofoo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520c'), 'foofoo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520d'), 'foofoo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520e'), 'foofoo': 'barbar'}, {'_id': ObjectId('5866d916b8de570ae5d9520f'), 'foofoo': 'barbar'}] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment