Skip to content

Instantly share code, notes, and snippets.

@escattone
Created February 21, 2020 00:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save escattone/c0b2748d3a8b135bbb93109552210bac to your computer and use it in GitHub Desktop.
Save escattone/c0b2748d3a8b135bbb93109552210bac to your computer and use it in GitHub Desktop.
Python code for checking that a document's slug is a sub-path of its parent's slug -- for all documents in all locales on MDN
from kuma.wiki.constants import EXPERIMENT_TITLE_PREFIX, LEGACY_MINDTOUCH_NAMESPACES, NOINDEX_SLUG_PREFIXES
def get_docs(locale):
qs = Document.objects.filter(locale=locale, is_redirect=False).exclude(html="")
qs = qs.only("id", "locale", "slug", "parent_topic")
qs = qs.select_related('parent_topic')
qs = qs.exclude(current_revision__isnull=True)
q = Q(slug__startswith=EXPERIMENT_TITLE_PREFIX)
for legacy_mindtouch_namespace in LEGACY_MINDTOUCH_NAMESPACES:
q |= Q(slug__startswith=f"{legacy_mindtouch_namespace}:")
for slug_start in NOINDEX_SLUG_PREFIXES:
q |= Q(slug__startswith=slug_start)
qs = qs.exclude(q)
qs = qs.order_by("id")
return list(qs)
def check(chunk):
failures = []
for i, doc in enumerate(chunk):
parent = doc.parent_topic
if parent and ('/'.join(doc.slug.split('/')[:-1]).lower() != parent.slug.lower()):
failures.append((doc.slug, parent.slug))
return failures
with open('hierarchy_check_failures.log', 'w') as log:
for locale in settings.ENABLED_LOCALES:
docs = get_docs(locale)
failures = check(docs)
header = f'locale = {locale}, {len(failures)} failures within {len(docs)} documents'
print(header)
dashes = '-' * len(header)
print(dashes + '\n' + header + '\n' + dashes, file=log)
for slug, parent_slug in failures:
print(f' doc.slug = {slug}', file=log)
print(f' doc.parent_topic.slug = {parent_slug}', file=log)
print(f' ---', file=log)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment