Created
March 13, 2013 14:28
Revisions
-
Waylan Limberg created this gist
Mar 13, 2013 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,75 @@ # HTML Tidy Extension for Python-Markdown # ======================================= # # Runs [HTML Tidy][] on the output of Python-Markdown using the [uTidylib][] # Python wrapper. Both libtidy and uTidylib must be installed on your system. # # [HTML Tidy]: http://tidy.sourceforge.net/ # [uTidylib]: http://utidylib.berlios.de/ # # Note than any Tidy [options][] can be passed in as [extension configs][]. So, # for example, to output HTML rather than XHTML, set ``output_xhtml=0``. To # indent the output, set ``indent=auto`` and to have Tidy wrap the output in # ``<html>`` and ``<body>`` tags, set ``show_body_only=0``. See Tidy's # [options][] for a full list of the available options. The defaults are set to # most closely match Markdowns defaults with the exception that you get much # better pretty-printing. # # [options]: http://tidy.sourceforge.net/docs/quickref.html # [extension configs]: ../reference.html#extension_configs # # Note that options set in this extension will override most any other settings # passed on to Markdown (such as "output_format"). Unlike Markdown, this extension # will also treat raw HTML no different than that output by Markdown. In other # words, it may munge a document authors carefully crafted HTML. Of course, it # may also transform poorly formed raw HTML into nice, valid HTML. Take these # things into consideration when electing to use this extension. # # Copyright (c)2008 [Waylan Limberg](http://achinghead.com) # # License: [BSD](http://www.opensource.org/licenses/bsd-license.php) from __future__ import absolute_import from . import Extension from ..postprocessors import Postprocessor from ..util import text_type try: import tidy except ImportError: tidy = None class TidyExtension(Extension): def __init__(self, configs): # Set defaults to match typical markdown behavior. self.config = dict(output_xhtml=1, show_body_only=1, char_encoding='utf8' ) # Merge in user defined configs overriding any present if nessecary. for c in configs: self.config[c[0]] = c[1] def extendMarkdown(self, md, md_globals): # Save options to markdown instance md.tidy_options = self.config # Add TidyProcessor to postprocessors if tidy: md.postprocessors['tidy'] = TidyProcessor(md) class TidyProcessor(Postprocessor): def run(self, text): # Pass text to Tidy. As Tidy does not accept unicode we need to encode # it and decode its return value. enc = self.markdown.tidy_options.get('char_encoding', 'utf8') return text_type(tidy.parseString(text.encode(enc), **self.markdown.tidy_options), encoding=enc) def makeExtension(configs=None): return TidyExtension(configs=configs)