Skip to content

Instantly share code, notes, and snippets.

@sontek
Created December 28, 2014 23:53
Check README for pypi
import sys
import StringIO
import urlparse
import cgi
from docutils import io, readers
from docutils.core import publish_doctree, Publisher
from docutils.transforms import TransformError
ALLOWED_SCHEMES = '''file ftp gopher hdl http https imap mailto mms news nntp
prospero rsync rtsp rtspu sftp shttp sip sips snews svn svn+ssh telnet
wais irc'''.split()
def trim_docstring(text):
"""
Trim indentation and blank lines from docstring text & return it.
See PEP 257.
"""
if not text:
return text
# Convert tabs to spaces (following the normal Python rules)
# and split into a list of lines:
lines = text.expandtabs().splitlines()
# Determine minimum indentation (first line doesn't count):
indent = sys.maxint
for line in lines[1:]:
stripped = line.lstrip()
if stripped:
indent = min(indent, len(line) - len(stripped))
# Remove indentation (first line is special):
trimmed = [lines[0].strip()]
if indent < sys.maxint:
for line in lines[1:]:
trimmed.append(line[indent:].rstrip())
# Strip off trailing and leading blank lines:
while trimmed and not trimmed[-1]:
trimmed.pop()
while trimmed and not trimmed[0]:
trimmed.pop(0)
# Return a single string:
return '\n'.join(trimmed)
def processDescription(source, output_encoding='unicode'):
"""Given an source string, returns an HTML fragment as a string.
The return value is the contents of the <body> tag.
Parameters:
- `source`: A multi-line text string; required.
- `output_encoding`: The desired encoding of the output. If a Unicode
string is desired, use the default value of "unicode" .
"""
# Dedent all lines of `source`.
source = trim_docstring(source)
settings_overrides = {
'raw_enabled': 0, # no raw HTML code
'file_insertion_enabled': 0, # no file/URL access
'halt_level': 2, # at warnings or errors, raise an exception
'report_level': 5, # never report problems with the reST code
}
# capture publishing errors, they go to stderr
old_stderr = sys.stderr
sys.stderr = s = StringIO.StringIO()
parts = None
try:
# Convert reStructuredText to HTML using Docutils.
document = publish_doctree(
source=source,
settings_overrides=settings_overrides
)
for node in document.traverse():
if node.tagname == '#text':
continue
if node.hasattr('refuri'):
uri = node['refuri']
elif node.hasattr('uri'):
uri = node['uri']
else:
continue
o = urlparse.urlparse(uri)
if o.scheme not in ALLOWED_SCHEMES:
raise TransformError('link scheme not allowed')
# now turn the transformed document into HTML
reader = readers.doctree.Reader(parser_name='null')
pub = Publisher(
reader,
source=io.DocTreeInput(document),
destination_class=io.StringOutput
)
pub.set_writer('html')
pub.process_programmatic_settings(None, settings_overrides, None)
pub.set_destination(None, None)
pub.publish()
parts = pub.writer.parts
except:
pass
sys.stderr = old_stderr
# original text if publishing errors occur
if parts is None or len(s.getvalue()) > 0:
output = "".join('<PRE>\n' + cgi.escape(source) + '</PRE>')
else:
output = parts['body']
if output_encoding != 'unicode':
output = output.encode(output_encoding)
return output
if __name__ == '__main__':
print(processDescription(sys.stdin.read()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment