Skip to content

Instantly share code, notes, and snippets.

@ghawkgu
Forked from remino/wpautop for ruby
Created January 4, 2013 07:25
Show Gist options
  • Save ghawkgu/4450636 to your computer and use it in GitHub Desktop.
Save ghawkgu/4450636 to your computer and use it in GitHub Desktop.
def wpautop(pee, br = true)
return '' if pee.strip == ''
pee = "#{pee}\n" # just to make things a little easier, pad the end
pee = pee.gsub(/<br \/>\s*<br \/>/, "\n\n")
# pace things out a little
allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|input|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)';
pee = pee.gsub(Regexp.new('(<'+allblocks+'[^>]*>)'), "\n"+'\1')
pee = pee.gsub(Regexp.new('(<\/'+allblocks+'[^>]*>)'), '\1' + "\n\n")
pee = pee.gsub(/\r\n|\r/, "\n") # cross-platform newlines
if pee.include?('<object')
pee = pee.gsub(/\s*<param([^>]*)>\s*/, '<param\1>') # no pee inside object/embed
pee = pee.gsub(/\s*<\/embed>\s*/, '</embed>')
end
pee = pee.gsub(/\n\n+/, "\n\n") # take care of duplicates
# make paragraphs, including one at the end
pees = pee.split(/\n\s*\n/)
pee = '';
pees.each do |tinkle|
next if tinkle == ""
pee += '<p>' + tinkle.gsub(/\n/, "") + "</p>\n";
end
pee = pee.gsub(/<p>\s*<\/p>/, "") # under certain strange conditions it could create a P of entirely whitespace
pee = pee.gsub(/<p>([^<]+)<\/(div|address|form)>/, '<p>\1</p></\2>')
pee = pee.gsub(Regexp.new('<p>\s*(<\/?'+allblocks+'[^>]*>)\s*<\/p>'), '\1') # don't pee all over a tag
pee = pee.gsub(/<p>(<li.+?)<\/p>/, "\1") # problem with nested lists
pee = pee.gsub(/<p><blockquote([^>]*)>/i, '<blockquote\1><p>')
pee = pee.gsub('</blockquote></p>', '</p></blockquote>')
pee = pee.gsub(Regexp.new('<p>\s*(<\/?'+ allblocks + '[^>]*>)'), '\1')
pee = pee.gsub(Regexp.new('(<\/?'+allblocks+'[^>]*>)\s*</p>'), '\1')
if br
pee = pee.scan(/<(script|style).*?<\/\\1>/) do |s|
pee = pee.gsub(s, "\n<WPPreserveNewline />")
end
pee = pee.gsub(/\(?<!<br \/>\)\s*\n/, "<br />\n") # optionally make line breaks
pee = pee.gsub('<WPPreserveNewline />', "\n")
end
pee = pee.gsub(Regexp.new('(</?'+allblocks+'[^>]*>)\s*<br />'), '\1')
pee = pee.gsub(/<br \/>(\s*<\/?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)/, '\1')
if pee.include?('<pre')
pee.scan(/(<pre[^>]*>)(.*?)<\/pre>/is) do |s|
s = s.gsub('<br />', '')
s = s.gsub('<p>', "\n")
s = s.gsub('</p>', '')
end
end
return pee.gsub(/\n<\/p>$/, '</p>')
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment