Skip to content

Instantly share code, notes, and snippets.

@d-rama
Forked from enkore/gist:2978752
Last active January 26, 2021 21:18
Show Gist options
  • Save d-rama/0e5002524ec05da00877bd06e9fe029c to your computer and use it in GitHub Desktop.
Save d-rama/0e5002524ec05da00877bd06e9fe029c to your computer and use it in GitHub Desktop.
HTML to reportlab flowables converter
# I place this in the public domain
# This only handles non-nested lists, emphasis, headings and horizontal rules (which are converted to page breaks)
# Sufficient for converting Markdown generated HTML to reportlab flowables...
import xml.sax as sax
def html_to_rl(html, styleSheet):
elements = list()
class Handler(sax.ContentHandler):
mode = ""
buffer = ""
listcounter = 0
listtype = ""
def startElement(self, name, attrs):
if name in ["strong", "em", "i", "b"]:
self.mode = name
elif name == "ol":
self.listcounter = 1
self.listtype = "ol"
elif name == "ul":
self.listtype = "ul"
elif name == "hr":
elements.append(Paragraph('<br />\n', styleSheet["BodyText"]))
def endElement(self, name):
if name.startswith("h") and name[-1] in ["1", "2", "3", "4", "5", "6"]:
elements.append(Paragraph(self.buffer, styleSheet["Heading%s" % name[-1]]))
elif name in ["strong", "em", "i", "b"]:
self.mode = ""
elif name == "p" or name=="div":
elements.append(Paragraph(self.buffer, styleSheet["BodyText"]))
elif name == "li":
if self.listtype == "ul":
elements.append(Paragraph(self.buffer, styleSheet["BodyText"], bulletText=u"•"))
else:
elements.append(Paragraph(self.buffer, styleSheet["BodyText"], bulletText="%s." % self.listcounter))
self.listcounter += 1
elif name in ["ol", "ul"]:
self.listcounter = 0
self.listtype = ""
if name in ["h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "div"]:
self.buffer = ""
def characters(self, chars):
surrounding = None
if self.mode in ["strong", "em", "i", "b"]:
if self.mode in ["strong", "b"]:
surrounding = "b"
else:
surrounding = "i"
if surrounding:
chars = u"<%s>%s</%s>" % (surrounding, chars, surrounding)
self.buffer += chars
# Yeah I know... this makes Jesus cry, but unfortunately SAX wants a document element
# surrounding everything
sax.parseString(u"<doc>%s</doc>" % html, Handler())
return elements
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment