Skip to content

Instantly share code, notes, and snippets.

@SphinxKnight
Created January 24, 2017 21:19
Show Gist options
  • Save SphinxKnight/d703f44ede302ccb2f396df96932c9e1 to your computer and use it in GitHub Desktop.
Save SphinxKnight/d703f44ede302ccb2f396df96932c9e1 to your computer and use it in GitHub Desktop.
'''
emulating doc status pages
'''
import urllib.request
import json
from datetime import datetime
PREFIX = 'https://developer.mozilla.org/en-US/docs/Web/'
SUFFIX = '$children?expand'
DATE_FORMAT = '%Y-%m-%dT%H:%M:%S'
SECTIONS = ['JavaScript', 'HTML', 'CSS']
def get_delta(section, target_locale):
""" returns a list of outdated pages under a given page """
ref_date = section['last_edit']
ref_url = section['url']
locale_url = ref_url
locale_date = ref_date
if 'translations' in section.keys():
for translation in section['translations']:
if translation['locale'] == target_locale:
locale_date = translation['last_edit']
locale_url = translation['url']
break
dico = dict()
dico['refURL'] = ref_url
dico['refDate'] = ref_date
dico['localeURL'] = locale_url
dico['localeDate'] = locale_date
list_pages = []
if 'subpages' in section.keys():
for subpage in section['subpages']:
list_pages.extend(get_delta(subpage, target_locale))
# Seems like some timestamp have microseconds: stripping
if datetime.strptime(ref_date[:19], DATE_FORMAT) > datetime.strptime(locale_date[:19], DATE_FORMAT):
list_pages.append(dico)
return list_pages
def get_delta_section(section, target_locale='fr'):
""" returns a list of outdate page for a given section """
page_json = urllib.request.urlopen(PREFIX + section + SUFFIX).read().decode('utf-8')
root_section = json.loads(page_json)
return get_delta(root_section, target_locale)
for sect in SECTIONS:
delta = get_delta_section(sect)
print(sect)
for page in delta:
print(page)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment