Skip to content

Instantly share code, notes, and snippets.

@gruzovator
Last active December 17, 2016 10:27
Show Gist options
  • Save gruzovator/6afb687b8d854c5ac916b58968f683ac to your computer and use it in GitHub Desktop.
Save gruzovator/6afb687b8d854c5ac916b58968f683ac to your computer and use it in GitHub Desktop.
pagination for periodically updated collection of sorted docs
# coding: utf-8
"""
Pagination model for sorted docs collection, that is periodically updated.
The key ideas:
- all docs have sequential id
- docs are sorted by composite key (weight, sequential id)
- response to page request containse 'next_offset' (a composite key) that should be used to get next page
"""
import bisect
import time
from collections import namedtuple
from random import randint
from threading import Lock
from threading import Thread
class Doc(namedtuple('Doc', 'seq_id, weight')): # every doc has sequential id
def __repr__(self):
return '{w:%d, s:%d}' % (self.weight, self.seq_id)
def docs_source(docs_collection):
"""Docs source model
Should be run in thread. Generates docs with sequential id and weight attributes.
"""
seq_id = 0
while True:
seq_id += 1
# weight model (importance + freshness)
weight = randint(0, 10) # doc content weight emulation
weight += seq_id # time component of weight, recent docs are more valuable
docs_collection.add(Doc(seq_id, weight))
time.sleep(randint(0, 5))
class DocsCollection(object):
def __init__(self, maxsize=6):
self.sorted_keys = []
self.docs = {}
self.maxsize = maxsize
self.__mtx = Lock()
def add(self, doc):
with self.__mtx:
key = (doc.weight, doc.seq_id)
assert key not in self.docs # keys are unique cause have a seq_id part
bisect.insort(self.sorted_keys, key)
self.docs[key] = doc
if len(self.sorted_keys) > self.maxsize:
key = self.sorted_keys.pop(0)
del self.docs[key]
def page(self, offset_key=None, limit=2):
""" Page request
:param offset_key: None for first page
:param limit: page docs number
:return: tuple of page docs, next offset key and all docs (to view current state)
"""
with self.__mtx:
page_docs = []
next_offset = None
pos = bisect.bisect_left(self.sorted_keys, offset_key) if offset_key else 0
try:
for i in range(pos, pos + limit):
page_docs.append(self.docs[self.sorted_keys[i]])
except IndexError:
pass
try:
next_offset = self.sorted_keys[pos + limit]
except IndexError:
pass
all_docs = [self.docs[k] for k in self.sorted_keys]
return (page_docs, next_offset, all_docs)
if __name__ == '__main__':
docs = DocsCollection(6)
src_thread = Thread(target=docs_source, args=[docs])
src_thread.daemon = True
src_thread.start()
# User reads docs
while True:
print
page_n = 0
next_offset = None
while True:
page_docs, next_offset, all_docs = docs.page(next_offset)
page_n += 1
print 'User reads docs on page', page_n
print '<ALL DOCS: %s>' % all_docs
print '<PAGE: %s>' % page_docs
print '<NEXT OFFSET: %s>' % str(next_offset)
print 'reading ...'
time.sleep(5)
if next_offset is None:
print 'No more docs. User press F5 or leaves the site'
break
else:
'User press "Next page"'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment