Created
March 13, 2013 14:28
-
-
Save waylan/5152650 to your computer and use it in GitHub Desktop.
Archive of the Python-Markdown HTML Tidy Extension.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# HTML Tidy Extension for Python-Markdown | |
# ======================================= | |
# | |
# Runs [HTML Tidy][] on the output of Python-Markdown using the [uTidylib][] | |
# Python wrapper. Both libtidy and uTidylib must be installed on your system. | |
# | |
# [HTML Tidy]: http://tidy.sourceforge.net/ | |
# [uTidylib]: http://utidylib.berlios.de/ | |
# | |
# Note than any Tidy [options][] can be passed in as [extension configs][]. So, | |
# for example, to output HTML rather than XHTML, set ``output_xhtml=0``. To | |
# indent the output, set ``indent=auto`` and to have Tidy wrap the output in | |
# ``<html>`` and ``<body>`` tags, set ``show_body_only=0``. See Tidy's | |
# [options][] for a full list of the available options. The defaults are set to | |
# most closely match Markdowns defaults with the exception that you get much | |
# better pretty-printing. | |
# | |
# [options]: http://tidy.sourceforge.net/docs/quickref.html | |
# [extension configs]: ../reference.html#extension_configs | |
# | |
# Note that options set in this extension will override most any other settings | |
# passed on to Markdown (such as "output_format"). Unlike Markdown, this extension | |
# will also treat raw HTML no different than that output by Markdown. In other | |
# words, it may munge a document authors carefully crafted HTML. Of course, it | |
# may also transform poorly formed raw HTML into nice, valid HTML. Take these | |
# things into consideration when electing to use this extension. | |
# | |
# Copyright (c)2008 [Waylan Limberg](http://achinghead.com) | |
# | |
# License: [BSD](http://www.opensource.org/licenses/bsd-license.php) | |
from __future__ import absolute_import | |
from . import Extension | |
from ..postprocessors import Postprocessor | |
from ..util import text_type | |
try: | |
import tidy | |
except ImportError: | |
tidy = None | |
class TidyExtension(Extension): | |
def __init__(self, configs): | |
# Set defaults to match typical markdown behavior. | |
self.config = dict(output_xhtml=1, | |
show_body_only=1, | |
char_encoding='utf8' | |
) | |
# Merge in user defined configs overriding any present if nessecary. | |
for c in configs: | |
self.config[c[0]] = c[1] | |
def extendMarkdown(self, md, md_globals): | |
# Save options to markdown instance | |
md.tidy_options = self.config | |
# Add TidyProcessor to postprocessors | |
if tidy: | |
md.postprocessors['tidy'] = TidyProcessor(md) | |
class TidyProcessor(Postprocessor): | |
def run(self, text): | |
# Pass text to Tidy. As Tidy does not accept unicode we need to encode | |
# it and decode its return value. | |
enc = self.markdown.tidy_options.get('char_encoding', 'utf8') | |
return text_type(tidy.parseString(text.encode(enc), | |
**self.markdown.tidy_options), | |
encoding=enc) | |
def makeExtension(configs=None): | |
return TidyExtension(configs=configs) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment