Skip to content

Instantly share code, notes, and snippets.

@ZwodahS
Last active July 4, 2017 12:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ZwodahS/d268aeb6f0163a95a4cf1bcc52b1df62 to your computer and use it in GitHub Desktop.
Save ZwodahS/d268aeb6f0163a95a4cf1bcc52b1df62 to your computer and use it in GitHub Desktop.
Batch cursor for motor
import tornado.gen
import motor
"""
Deals with cursor timeout in case it happens while looping.
Only use this if you ARE DAMN sure that the data don't change while you are iterating.
"""
class BatchCursor(object):
def __init__(self, batch_collection, query, sort=None, buffered_limit=1000):
self.batch_collection = batch_collection
self.query = query
self.skip = 0
self._sort = sort
self.limit = buffered_limit
self.cursor = None
@property
@tornado.gen.coroutine
def fetch_next(self):
has_next = (yield self.cursor.fetch_next) if self.cursor else False
if not has_next:
self.cursor = self.batch_collection.motor_collection.find(self.query)
if self.sort:
self.cursor = self.cursor.sort(self._sort)
self.cursor = self.cursor.skip(self.skip).limit(self.limit)
self.skip += self.limit
has_next = yield self.cursor.fetch_next
return has_next
return has_next
def next_object(self):
return self.cursor.next_object() if self.cursor is not None else None
def sort(self, sort):
self._sort = sort
return self
class BatchCollection(object):
def __init__(self, motor_collection):
self.motor_collection = motor_collection
def find(self, query=None):
query = query or {}
return BatchCursor(self, query)
@tornado.gen.coroutine
def run():
mongo_client = motor.MotorClient("mongodb://localhost/library").get_default_database()
cursor = BatchCollection(mongo_client["books"]).find().sort((("_id", -1), ))
clusters = []
while (yield cursor.fetch_next):
c = cursor.next_object()
clusters.append(c["_id"])
if len(clusters) > 1000:
break
cursor2 = mongo_client["books"].find().sort((("_id", -1), ))
clusters2 = []
while (yield cursor2.fetch_next):
c = cursor2.next_object()
clusters2.append(c["_id"])
if len(clusters2) > 1000:
break
equals = [ a[0] == a[1] for a in zip(clusters, clusters2) ]
print(all(equals))
if __name__ == "__main__":
run()
tornado.ioloop.IOLoop.instance().start()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment