Skip to content

Instantly share code, notes, and snippets.

@jpmckinney
Last active November 30, 2019 05:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpmckinney/ba38d531e245c175e254ea3bc4218001 to your computer and use it in GitHub Desktop.
Save jpmckinney/ba38d531e245c175e254ea3bc4218001 to your computer and use it in GitHub Desktop.
Compare differences after making changes to Sphinx

Written as part of open-contracting/standard_profile_template#33

curl -O https://gist.githubusercontent.com/jpmckinney/ba38d531e245c175e254ea3bc4218001/raw/bbb015f613ba936b3b42f25c576e833d196e5181/diff-reduce.py
python diff-reduce.py
diff -rq build t | grep Only | sort
diff -ru -x '*.js' -x '.buildinfo' -x '_sources' build t | less
import os.path
import re
from glob import glob
def replace(basedir, replacements):
for filename in glob(os.path.join(basedir, '**', '*.html'), recursive=True):
with open(filename) as f:
content = f.read()
for pattern, replacement in replacements.items():
content = re.sub(pattern, replacement, content, flags=re.MULTILINE)
with open(filename, 'w') as f:
f.write(content)
replace('build', {
r'\.\.\.': '…',
# URLs
'/codelists#': '/codelists/#',
'/schema#': '/schema/#',
# HTML entities
'‘': '‘',
'’': '’',
'“': '“',
'”': '”',
'>&nbsp;<': '><',
# Classes
' class="first"': '',
' class="last"': '',
' class="first ': ' class="',
' class="last ': ' class="',
' class="first last ': ' class="',
' class="hide last"': ' class="hide"',
'<dl class="docutils">': '<dl class="simple">',
# Links
' internal"': ' external"',
r'<span class="doc">([^<]+)</span>': r'\1',
# Blockquotes
r'<blockquote>\n<div>(?!<p>)(.+)</div>': r'<blockquote>\n<div><p>\1</p>\n</div>',
# Lists
r'^<dd>(?!<p>)(.+)</dd>': r'<dd><p>\1</p>\n</dd>',
r'^<li>(?!<p>)(.+)</li>': r'<li><p>\1</p></li>',
r'^<li>(?!<p>)(.+)<ul>': r'<li><p>\1</p>\n<ul>',
r'^<li>(<p>.+</p>)\n</li>': r'<li>\1</li>',
# Tables
r'<table border="1" class="([^"]*)docutils">': r'<table class="\1docutils align-default">',
'<col width="': '<col style="width: ',
'<thead valign="bottom">': '<thead>',
'<tbody valign="top">': '<tbody>',
r'<th class="head"([^>]*)>(?!<p>)(.+?)\n?</th>': r'<th class="head"\1><p>\2</p></th>',
r'<td>(?!<p>)(.+?)\n?</td>': r'<td><p>\1</p></td>',
r'<td colspan="([14])">(?!<p>)(.+)</td>': r'<td colspan="\1"><p>\2</p></td>',
# Cleanup (too lazy to fix the above)
r'(<p>)+': '<p>',
r'(</p>)+': '</p>',
# Whitespace
'</dt>\n<dd>': '</dt><dd>',
})
replace('t', {
r' <script type="text/javascript" src="(\.\./)*_static/language_data\.js"></script>\n': '',
# Classes
' notranslate': '',
' class="first ': ' class="',
# Links
' internal"': ' external"',
r'<span class="doc">([^<]+)</span>': r'\1',
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment