Skip to content

Instantly share code, notes, and snippets.

@escattone
Last active July 19, 2019 03:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save escattone/2f1a27a16ebd4233b0f1197a58eb9c0a to your computer and use it in GitHub Desktop.
Save escattone/2f1a27a16ebd4233b0f1197a58eb9c0a to your computer and use it in GitHub Desktop.
Re-render docs that use a macro (forked from https://gist.github.com/jwhitlock/43e34e07bef8c3f1863e91f076778ca6)
# From https://gist.github.com/jwhitlock/43e34e07bef8c3f1863e91f076778ca6
from time import sleep, time
import redis
from celery.states import READY_STATES
from django.conf import settings
from kuma.wiki.models import Document
from kuma.wiki.tasks import render_document
def null_notify_rerender_chunk(event, doc_id, task):
"""Throw away render events."""
pass
doc_urls = dict()
def verbose_notify_rerender_chunk(event, doc_id, task):
"""Print render events."""
global doc_urls
if doc_id not in doc_urls:
doc = Document.objects.get(id=doc_id)
doc_urls[doc_id] = doc.get_full_url()
doc_url = doc_urls[doc_id]
print("Render %s (%s): %d %s" % (event, task.state, doc_id, doc_url))
def rerender_chunk(doc_ids, stuck_time=120, notifier_func=None):
"""
Queue a set of documents to re-render, and wait until they are done.
Keyword Arguments:
doc_ids - A sequence of document IDs to re-render
stuck_time (120) - The time to wait for the last re-render to complete.
notifier_func (None) - A function to call when a document event occurs.
Return is a tuple of counts (documents rendered, documents unrendered)
"""
if not notifier_func:
notifier_func = null_notify_rerender_chunk
tasks = []
total = len(doc_ids)
for doc_id in doc_ids:
task = render_document.delay(doc_id, "no-cache", None, force=True, invalidate_cdn_cache=False)
# notifier_func('start', doc_id, task)
tasks.append((doc_id, task, task.state, False))
in_progress = len(doc_ids)
stuck = 0
while in_progress:
last_in_progress = in_progress
in_progress = 0
next_tasks = []
for doc_id, task, state, done in tasks:
if not done:
state = task.state
if state in READY_STATES:
done = True
notifier_func('done', doc_id, task)
else:
in_progress += 1
next_tasks.append((doc_id, task, state, done))
tasks = next_tasks
if last_in_progress == in_progress:
stuck += 1
else:
stuck = 0
if stuck >= stuck_time:
for doc_id, task, state, done in tasks:
if not done:
notifier_func('stuck', doc_id, task)
return (total - in_progress, in_progress)
if in_progress:
sleep(1)
return total, 0
def purgable_count():
"""Return the number of tasks in the purgable queue."""
if settings.CELERY_BROKER_URL.startswith('redis://'):
cache = redis.from_url(settings.CELERY_BROKER_URL)
return cache.llen('mdn_purgeable')
else:
raise ValueError('Not redis broker: %s' % settings.CELERY_BROKER_URL)
def null_notify_wait_purgable(event, count, limit):
"""Throw away purgable count."""
pass
def verbose_notify_wait_purgable(event, count, limit):
"""Print purgable count."""
print("Purgable queue %s: Target depth %d, Current depth %d" % (event, limit, count))
def wait_purgable(limit=1, notifier_func=None):
"""
Wait for the purgable queue to empty out.
"""
assert limit >= 0
if not notifier_func:
notifier_func = null_notify_wait_purgable
try:
count = purgable_count()
except ValueError:
notifier_func('not redis', -1, limit)
sleep(5)
return
notifier_func('start', count, limit)
if count < limit:
return
while count > limit:
sleep(15)
count = purgable_count()
notifier_func('progress', count, limit)
def chunks(items, chunk_size):
"""Yield successive chunk_size-sized chunks from items."""
for i in range(0, len(items), chunk_size):
yield items[i:i + chunk_size]
def collect_doc_ids(docs, verbose=True, doc_filter=None):
'''Collect the IDs of documents to rerender.'''
raw_doc_ids = list(docs.order_by('id').values_list('id', flat=True))
if doc_filter:
if verbose:
print("Processing %d documents for relevant docs..." % len(raw_doc_ids))
doc_ids = []
for doc_id in raw_doc_ids:
doc = Document.objects.get(id=doc_id)
if doc_filter(doc):
doc_ids.append(doc_id)
if verbose:
print("%d of %d documents remain." % (len(doc_ids), len(raw_doc_ids)))
else:
doc_ids = raw_doc_ids[:]
return doc_ids
def error_count(doc_ids):
'''Count documents with KumaScript rendering errors.'''
docs = (Document.objects
.filter(id__in=doc_ids)
.exclude(rendered_errors__isnull=True))
return docs.count()
def rerender_slow(docs, verbose=True, limit=100, error_percent=10.0, doc_filter=None):
'''Re-render a Document queryset a chunk at a time.
Keyword arguments:
docs - A queryset of Documents
verbose - Be verbose
limit - How many to rerender at a time
error_percent - A float in range (0.0, 100.0], to abort due to KS errors.
doc_filter - A further filter of doc instances
Return: A tuple:
- Total number of docs rendered
- Total number of docs unrendered (stuck)
- Total number of docs with kumascript errors
- Time in seconds it took to re-render slowly
'''
start_time = time()
if verbose:
rerender_notify = verbose_notify_rerender_chunk
wait_notify = verbose_notify_wait_purgable
else:
rerender_notify = wait_notify = None
doc_ids = collect_doc_ids(docs, verbose, doc_filter)
total = len(doc_ids)
rendered, errored, unrendered, progress = 0, 0, 0, 0
wait_purgable(notifier_func=wait_notify)
for chunk in chunks(doc_ids, limit):
progress += len(chunk)
if verbose:
percent = 100.0 * float(progress) / float(total)
print("*** Rendering %d of %d docs (%0.1f%%)"
% (progress, total, percent))
chunk_res = rerender_chunk(chunk, notifier_func=rerender_notify)
rendered += chunk_res[0]
unrendered += chunk_res[1]
# Wait for purgable queue to clear
wait_purgable(notifier_func=wait_notify)
# Count errors
new_errors = error_count(chunk)
if new_errors and verbose:
print("%d errored documents in last chunk." % new_errors)
errored += new_errors
error_limit = progress * error_percent / 100.0
if errored >= error_limit:
if verbose:
print("%d of %d documents have errors, aborting."
% (errored, progress))
return rendered, unrendered, errored, time() - start_time
return rendered, unrendered, errored, time() - start_time
def macro_docs_and_filter(macro_name):
def macro_filter(doc):
return macro_name.lower() in [x.lower() for x in doc.extract.macro_names()]
docs = Document.objects.filter(html__icontains=macro_name.lower())
return docs, macro_filter
def rerender_macro_users(macro_name, verbose=True):
docs, doc_filter = macro_docs_and_filter(macro_name)
return rerender_slow(docs, verbose=verbose, doc_filter=doc_filter)
def macro_list_docs_and_filter(macro_names):
assert len(macro_names) > 1
lower_macro_names = [macro_name.lower() for macro_name in macro_names]
def macros_filter(doc):
doc_macros = [x.lower() for x in doc.extract.macro_names()]
return any((macro in doc_macros) for macro in lower_macro_names)
docs = Document.objects.filter(html__icontains=lower_macro_names[0])
for name in lower_macro_names[1:]:
docs |= Document.objects.filter(html__icontains=name)
return docs, macros_filter
def rerender_users_of_macro_list(macro_names, verbose=True):
docs, doc_filter = macro_list_docs_and_filter(macro_names)
return rerender_slow(docs, verbose=verbose, doc_filter=doc_filter)
# Single macro version
# macro = 'CertifiedBadge'
rendered, unrendered, errored, seconds = rerender_macro_users(macro)
# https://github.com/mdn/kumascript/pull/789
# macros = ['APIRef', 'AddonSidebar', 'CSSRef', 'CanvasSidebar', 'DefaultAPISidebar', 'DocStatusQuickLinks', 'FirefoxOSAPIRef', 'FirefoxOSSidebar', 'FirefoxSidebar', 'GamesSidebar', 'HTMLMainQuickLinks', 'HTMLRef', 'HTMLSidebar', 'HTTPSidebar', 'JSSidebar', 'LearnSidebar', 'MDNSidebar', 'SVGRef', 'ServiceWorkerSidebar', 'SpiderMonkeySidebar', 'ToolsSidebar', 'WebAssemblySidebar', 'WebGLSidebar', 'WebRTCSidebar', 'eventref', 'jsctypesSidebar', 'nsprapiref']
# rendered, unrendered, errored, seconds = rerender_users_of_macro_list(macros)
print("Rendered %d docs, %d left unrendered, %d errored, in %d seconds." % (rendered, unrendered, errored, seconds))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment