Skip to content

Instantly share code, notes, and snippets.

@albertsun
Created August 21, 2011 05:44
Show Gist options
  • Save albertsun/1160201 to your computer and use it in GitHub Desktop.
Save albertsun/1160201 to your computer and use it in GitHub Desktop.
Python port of WordPress's wpautop filter
import re
from django import template
from django.utils.functional import allow_lazy
from django.template.defaultfilters import stringfilter
from django.utils.safestring import mark_safe, SafeData
from django.utils.encoding import force_unicode
from django.utils.html import escape
from django.utils.text import normalize_newlines
register = template.Library()
def linebreaks_wp(pee, autoescape=False):
"""Straight up port of http://codex.wordpress.org/Function_Reference/wpautop"""
if (pee.strip() == ""):
return ""
pee = normalize_newlines(pee)
pee = pee + "\n"
pee = re.sub(r'<br />\s*<br />', "\n\n", pee)
allblocks = r'(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|input|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'
pee = re.sub(r'(<' + allblocks + '[^>]*>)', lambda m: "\n"+m.group(1) if m.group(1) else "\n", pee)
pee = re.sub(r'(</' + allblocks + '>)', lambda m: m.group(1)+"\n\n" if m.group(1) else "\n\n", pee)
#pee = pee.replace("\r\n", "\n")
#pee = pee.replace("\r", "\n") #these taken care of by normalize_newlines
if (pee.find("<object") != -1):
pee = re.sub(r'\s*<param([^>]*)>\s*', lambda m: "<param%s>" % (m.group(1) if m.group(1) else "", ), pee) # no pee inside object/embed
pee = re.sub(r'\s*</embed>\s*', '</embed>', pee)
pee = re.sub(r"\n\n+", "\n\n", pee) # take care of duplicates
pees = re.split(r'\n\s*\n', pee) # since PHP has a PREG_SPLIT_NO_EMPTY, may need to go through and drop any empty strings
#pees = [p for p in pees if p]
pee = "".join(["<p>%s</p>\n" % tinkle.strip('\n') for tinkle in pees])
pee = re.sub(r'<p>\s*</p>', '', pee) #under certain strange conditions it could create a P of entirely whitespace
pee = re.sub(r'<p>([^<]+)</(div|address|form)>', lambda m: "<p>%s</p></%s>" % ((lambda x: x.group(1) if x.group(1) else "")(m), (lambda x: x.group(2) if x.group(2) else "")(m), ), pee)
pee = re.sub(r'<p>\s*(</?' + allblocks + r'[^>]*>)\s*</p>', lambda m: m.group(1) if m.group(1) else "", pee) # don't pee all over a tag
pee = re.sub(r"<p>(<li.+?)</p>", lambda m: m.group(1) if m.group(1) else "", pee) # problem with nested lists
pee = re.sub(r'<p><blockquote([^>]*)>', lambda m: "<blockquote%s><p>" % (m.group(1) if m.group(1) else "",), pee, flags=re.IGNORECASE)
pee = pee.replace('</blockquote></p>', '</p></blockquote>')
pee = re.sub(r'<p>\s*(</?' + allblocks + r'[^>]*>)', lambda m: m.group(1) if m.group(1) else "", pee)
pee = re.sub(r'(</?' + allblocks + '[^>]*>)\s*</p>', lambda m: m.group(1) if m.group(1) else "", pee)
def _autop_newline_preservation_helper(matches):
return matches.group(0).replace("\n", "<WPPreserveNewline />")
pee = re.sub(r'<(script|style).*?</\1>', _autop_newline_preservation_helper, pee, flags=re.DOTALL)
pee = re.sub(r'(?<!<br />)\s*\n', "<br />\n", pee) # make line breaks
pee = pee.replace('<WPPreserveNewline />', "\n")
pee = re.sub(r'(</?' + allblocks + '[^>]*>)\s*<br />', lambda m: m.group(1) if m.group(1) else "", pee)
pee = re.sub(r'<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)', lambda m: m.group(1) if m.group(1) else "", pee)
if (pee.find('<pre') != -1):
def clean_pre(m):
if m.group(1) and m.group(2):
text = m.group(2)
text = text.replace('<br />', '')
text = text.replace('<p>', "\n")
text = text.replace('</p>', '')
text = m.group(1)+escape(text)+"</pre>"
else:
text = m.group(0)
text = text.replace('<br />', '')
text = text.replace('<p>', "\n")
text = text.replace('</p>', '')
return text
pee = re.sub('(?is)(<pre[^>]*>)(.*?)</pre>', clean_pre, pee)
pee = re.sub( r"\n</p>$", '</p>', pee)
return pee
linebreaks_wp = allow_lazy(linebreaks_wp, unicode)
@register.filter("linebreaks_wp")
@stringfilter
def linebreaks_wp_filter(value, autoescape=None):
"""Straight up port of http://codex.wordpress.org/Function_Reference/wpautop"""
autoescape = autoescape and not isinstance(value, SafeData)
return mark_safe(linebreaks_wp(value, autoescape))
linebreaks_wp_filter.is_safe = True
linebreaks_wp_filter.needs_autoescape = True
linebreaks_wp = stringfilter(linebreaks_wp)
@sebbcn
Copy link

sebbcn commented Jul 23, 2014

This gist saved my day. Thanks!

@howmp
Copy link

howmp commented Mar 29, 2017

ths for share

@ehsantg
Copy link

ehsantg commented Oct 16, 2017

thank you

@frankyhung93
Copy link

This also saved my daY!! You are a hero in doing this!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment