Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Generate HTML in Python
import tornado.ioloop
import tornado.web
#import htmlboilerplate as html
from bs4 import BeautifulSoup, Tag
import htmlboilerplate2 as html
class MainHandler(tornado.web.RequestHandler):
def get(self):
soup = BeautifulSoup('', 'html5lib')
def getAnotherRow():
return [
html.bgcolor('red', html.link(self.application.reverse_url('foo'), 'link to foo')),
html.bgcolor('blue', '<"oh" & snap!\'>'),
html.bgcolor('black', html.link('http://example.com', 'foo & bar')),
]
soup.head.append(Tag(name='title'))
soup.head.title.append('This is an example title')
soup.body.append(html.buildTable(
header=['col1header', 'col2header', 'col3header'],
rows=[
['row1col1', 'row1col2', 'row1col3'],
getAnotherRow(),
]
))
self.set_status(200)
self.write(html.doctype())
self.write(str(soup))
self.finish()
def make_app():
return tornado.web.Application([
(r"/(favicon.ico)", tornado.web.StaticFileHandler, {'path': '/dev/null'}),
(r"/.*", MainHandler, {}, 'foo'),
])
if __name__ == "__main__":
app = make_app()
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
import tornado.escape
def escape(content):
"""
>>> escape('foo & bar')
'foo &amp; bar'
>>> escape(EscapedContent('foo & bar'))
'foo &amp; bar'
>>> escape(EscapedContent(escaped='foo & bar'))
'foo & bar'
"""
if type(content) is EscapedContent:
return str(content)
return str(tornado.escape.xhtml_escape(str(content)))
class EscapedContent(object):
"""RenderedHtml
Class which just holds either a string _to be_ escaped or html which has
already been escaped.
>>> s = 'foo & bar'
>>> str(EscapedContent(s))
'foo &amp; bar'
>>> str(EscapedContent(unescaped=s))
'foo &amp; bar'
>>> str(EscapedContent(escaped=s))
'foo & bar'
>>> str(EscapedContent(unescaped=s, escaped=s))
Traceback (most recent call last):
...
ValueError: Both unescaped and escaped was provided to EscapedContent. Use one or the other.
"""
def __init__(self, unescaped = None, escaped = None, *args, **kwargs):
if unescaped == None and escaped == None:
return
if unescaped != None and escaped != None:
raise ValueError('Both unescaped and escaped was provided to EscapedContent. Use one or the other.')
if unescaped != None:
self.content = escape(unescaped)
elif type(escaped) == EscapedContent:
self.content = escaped.content
else:
self.content = escaped
def __str__(self):
return self.content
def doctype():
return EscapedContent(escaped=(
"<!DOCTYPE"
" html"
" PUBLIC"
" \"-//W3C//DTD XHTML 1.0 Transitional//EN\""
" \"http://www.w3.org/TR/xhtml/DTD/xhtml1-transitional.dtd\""
">"
"<html"
" xmlns=\"http://www.w3.org/1999/xhtml\""
">"
))
def headtitle(title):
return EscapedContent(escaped='<head><title>{}</title></head>'.format(title))
def link(url, text = None):
"""
>>> str(link('foo & bar'))
"<a href=\'foo & bar\'>foo &amp; bar</a>"
>>> str(link('foo & bar', 'foo & bar'))
"<a href=\'foo & bar\'>foo &amp; bar</a>"
>>> str(link('foo & bar', EscapedContent(escaped='foo & bar')))
"<a href=\'foo & bar\'>foo & bar</a>"
>>> str(link(EscapedContent(escaped='foo & bar')))
"<a href=\'foo & bar\'>foo & bar</a>"
"""
return EscapedContent(escaped='<a href=\'{}\'>{}</a>'.format(
str(url),
str(escape(text)) if text else str(escape(url)))
)
def anchor(name, text = None):
"""
>>> str(anchor('foo & bar'))
"<a id=\'foo & bar\'>foo &amp; bar</a>"
>>> str(anchor('foo & bar', 'foo & bar'))
"<a id=\'foo & bar\'>foo &amp; bar</a>"
>>> str(anchor('foo & bar', EscapedContent(escaped='foo & bar')))
"<a id=\'foo & bar\'>foo & bar</a>"
>>> str(anchor(EscapedContent(escaped='foo & bar')))
"<a id=\'foo & bar\'>foo & bar</a>"
"""
return EscapedContent(escaped='<a id=\'{}\'>{}</a>'.format(
str(name),
str(escape(text)) if text else str(escape(name)))
)
def table_head_or_row(row, sep):
# for head row, sep = 'th'
# for reguar row, sep = 'td'
return EscapedContent(escaped='<tr>{}</tr>'.format(
''.join(['<{}>{}</{}>'.format(
sep, str(escape(column)), sep
) for column in row])
))
def tablehead(row):
"""
>>> unescapedrow = ['foo', 'bar', 'foo & bar']
>>> str(tablehead(unescapedrow))
'<tr><th>foo</th><th>bar</th><th>foo &amp; bar</th></tr>'
>>> escapedrow = ['foo', 'bar', EscapedContent(escaped='foo & bar')]
>>> str(tablehead(escapedrow))
'<tr><th>foo</th><th>bar</th><th>foo & bar</th></tr>'
"""
return table_head_or_row(row, 'th')
def tablerow(row):
"""
>>> unescapedrow = ['foo', 'bar', 'foo & bar']
>>> str(tablerow(unescapedrow))
'<tr><td>foo</td><td>bar</td><td>foo &amp; bar</td></tr>'
>>> escapedrow = ['foo', 'bar', EscapedContent(escaped='foo & bar')]
>>> str(tablerow(escapedrow))
'<tr><td>foo</td><td>bar</td><td>foo & bar</td></tr>'
"""
return table_head_or_row(row, 'td')
def table(header, rows):
"""
>>> uh = ['foo', 'bar', 'foo & bar']; ur = [['for', 'brr', 'for & brr']]
>>> eh = ['FOO', 'BAR', EscapedContent(escaped='FOO & BAR')]; er = [['FOR', 'BRR', EscapedContent(escaped='FOR & BRR')]]
>>> str(table(uh, ur))
'<table><thead><tr><th>foo</th><th>bar</th><th>foo &amp; bar</th></tr></thead><tbody><tr><td>for</td><td>brr</td><td>for &amp; brr</td></tr></tbody></table>'
>>> str(table(eh, er))
'<table><thead><tr><th>FOO</th><th>BAR</th><th>FOO & BAR</th></tr></thead><tbody><tr><td>FOR</td><td>BRR</td><td>FOR & BRR</td></tr></tbody></table>'
"""
# Expect header to be a list (one entry per column)
# Expect rows to be a list of lists
# where first level list is per-row
# second level array is per-column,
# eg: [[r1c1,r1c2],[r2c1,r2c2],...]
return EscapedContent(escaped=(
'<table><thead>{}</thead><tbody>{}</tbody></table>'
.format(
str(tablehead(header)),
''.join([str(tablerow(row)) for row in rows])
)
))
def bgcolor(color, content):
# color could be eg 'inherit', 'transparent', 'initial', or a color value.
# https://www.w3schools.com/cssref/pr_background-color.asp
return EscapedContent(escaped=(
'<span style="background-color:{}">{}</span>'
.format(
'initial' if None == color else str(color),
escape(content)
)
))
if __name__ == '__main__':
import doctest
doctest.testmod()
#!/usr/bin/env python3
from bs4 import BeautifulSoup, Tag
def doctype():
# https://www.w3schools.com/tags/tag_doctype.asp
# Not actually a tag, not actually part of HTML. Must come before <html>
# It is parsed by browsers to determine the HTML version.
# For example, HTML4, XHTML, HTML5, etc all have different DOCTYPE
# blurbs.
#
# There is also a Doctype class in BeautifulSoup. I have no idea how to
# use it. If you do know, please fix.
return "<!DOCTYPE html>"
# XHTML was returned by htmlboilerplate.py. BeautifulSoup makes HTML5
# easier though, so that's now deprecated. See below.
# "<!DOCTYPE"
# " html"
# " PUBLIC"
# " \"-//W3C//DTD XHTML 1.0 Transitional//EN\""
# " \"http://www.w3.org/TR/xhtml/DTD/xhtml1-transitional.dtd\""
# ">"
# "<html"
# " xmlns=\"http://www.w3.org/1999/xhtml\""
# ">
def returnOrEmbed(obj, desiredTag, newAttrs = None):
# Note case sensitivity!
if (type(obj) == Tag) and (obj.name == desiredTag):
return obj
t = Tag(name=desiredTag, attrs=newAttrs)
if None == obj:
return t
if type(obj) == list:
for o in obj:
t.append(o)
else:
t.append(obj)
return t
def tableHeadElements(elements):
"""
>>> c3 = Tag(name='th'); c3.append('<test>')
>>> c4 = Tag(name='div'); c4.append('m&m')
>>> tableHeadElements(['foobar', 'foo & bar', c3, c4])
[<th>foobar</th>, <th>foo &amp; bar</th>, <th>&lt;test&gt;</th>, <th><div>m&amp;m</div></th>]
"""
# headers is a list of columns or rows to have the <thead> wrapped.
build = lambda cell: returnOrEmbed(cell, 'th')
tags = [
build(cell)
for cell in elements
]
return tags
def tableDataElements(content):
"""
>>> c3 = Tag(name='td'); c3.append('<oh man>')
>>> c4 = Tag(name='div'); c4.append('m&m')
>>> tableDataElements(['foobar', 'foo & bar', c3, c4])
[<td>foobar</td>, <td>foo &amp; bar</td>, <td>&lt;oh man&gt;</td>, <td><div>m&amp;m</div></td>]
"""
build = lambda cell: returnOrEmbed(cell, 'td')
tags = [
build(cell)
for cell in content
]
return tags
def tableRow(elements):
return returnOrEmbed(elements, 'tr')
def buildTable(header = None, rows = None, caption = None, tableId = None):
"""
>>> buildTable()
<table></table>
>>> buildTable(header=[], rows=[])
<table></table>
>>> buildTable(caption='l&ol', tableId='iamanid')
<table id="iamanid"><caption>l&amp;ol</caption></table>
>>> c=Tag(name='div'); c.append('l&ol'); buildTable(caption=c, tableId='iamanid')
<table id="iamanid"><caption><div>l&amp;ol</div></caption></table>
>>> hc3 = Tag(name='th'); hc3.append('<hc3>') # hc=header cell
>>> hc4 = Tag(name='td'); hc4.append('<hc4>')
>>> h = ['hc1', 'hc2', hc3, hc4]; hc3['bute'] = 'attr'
>>> r1c3 = Tag(name='th'); r1c3.append('<r1c3>') # rxcx = row x column x
>>> r1c4 = Tag(name='td'); r1c4.append('<r1c4>'); r1c4['attr'] = 'bute'
>>> r2c4 = Tag(name='div'); r2c4.append('<r2c4>'); r2c4['class'] = 'foo'
>>> rs = [['r1c1', 'r1c2', r1c3, r1c4], ['r2c1', 'r2c2', 'r2c3', r2c4]]
>>> t = buildTable(header=h, rows=rs); print('\\n'.join(str(c) for c in t.children))
<tr><th>hc1</th><th>hc2</th><th bute="attr">&lt;hc3&gt;</th><th><td>&lt;hc4&gt;</td></th></tr>
<tr><td>r1c1</td><td>r1c2</td><td><th>&lt;r1c3&gt;</th></td><td attr="bute">&lt;r1c4&gt;</td></tr>
<tr><td>r2c1</td><td>r2c2</td><td>r2c3</td><td><div class="foo">&lt;r2c4&gt;</div></td></tr>
"""
table = Tag(name='table')
if None != tableId:
table['id'] = str(tableId)
if None != caption:
table.append(returnOrEmbed(caption, 'caption'))
if None != header and len(header):
header = tableHeadElements(header)
table.append(tableRow(header))
if None != rows and len(rows):
for row in rows:
table.append(tableRow(tableDataElements(row)))
return table
def link(url, content = None):
"""
>>> link('http://foo/bar?lol&what')
<a href=\"http://foo/bar?lol&amp;what\">http://foo/bar?lol&amp;what</a>
>>> link('foo & bar')
<a href=\"foo &amp; bar\">foo &amp; bar</a>
>>> link('foo & bar', content='')
<a href=\"foo &amp; bar\"></a>
>>> text=Tag(name='span'); text.append('l&ol'); link('foo & bar', text)
<a href=\"foo &amp; bar\"><span>l&amp;ol</span></a>
>>> text=Tag(name='span'); text.append('l&ol'); link(text)
Traceback (most recent call last):
...
ValueError: url should not be a Tag, or should contain an 'href' attribute
>>> l=link('foo&bar'); link(l, 'lolwhat')
<a href=\"foo&amp;bar\">lolwhat</a>
"""
if type(url) == Tag:
if 'href' not in url.attrs:
raise ValueError(
'url should not be a Tag, '
'or should contain an \'href\' attribute'
)
url = url['href']
a = Tag(name='a')
a['href'] = url
a.append(content if None != content else url)
return a
def anchor(name, content = None):
"""
>>> anchor('bar&foo')
<a id=\"bar&amp;foo\">bar&amp;foo</a>
>>> anchor('bar&foo', 'l&ol')
<a id=\"bar&amp;foo\">l&amp;ol</a>
>>> text=Tag(name='span'); text.append('l&ol'); anchor('bar&foo', text)
<a id=\"bar&amp;foo\"><span>l&amp;ol</span></a>
>>> text=Tag(name='span'); text.append('l&ol'); anchor(text)
Traceback (most recent call last):
...
ValueError: anchor name should not be a Tag, or should contain an 'id' attribute
>>> a=anchor('iam&ananchor'); anchor(a, 'i am text')
<a id=\"iam&amp;ananchor\">i am text</a>
"""
if type(name) == Tag:
if 'id' not in name.attrs:
raise ValueError(
'anchor name should not be a Tag, '
'or should contain an \'id\' attribute'
)
name = name['id']
a = Tag(name='a')
a['id'] = name
a.append(content if None != content else name)
return a
def bgcolor(color, content = None):
"""
>>> bgcolor('red')
<span style=\"background-color:red\"></span>
>>> bgcolor('red', 'i am text')
<span style=\"background-color:red\">i am text</span>
>>> bgcolor('', 'i am text')
<span style=\"background-color:initial\">i am text</span>
>>> span=Tag(name='span');span['style']='color:red;background-color:blue';bgcolor(span, 'i am text')
<span style=\"background-color:blue\">i am text</span>
"""
if type(color) == Tag:
def bad():
raise ValueError(
'background color name should not be a Tag, '
'or should contain a \'style\' attribute with '
'a \'background-color\' field'
)
if 'style' not in color.attrs:
bad()
styles = color['style']
for attr in styles.split(';'):
if attr.startswith('background-color:'):
color = attr.replace('background-color:', '')
break
else:
bad()
if None == color or '' == color:
color = 'initial'
span = Tag(name='span',attrs={'style': 'background-color:{}'.format(color)})
if None != content:
span.append(content)
return span
if __name__ == '__main__':
import doctest
doctest.testmod(verbose=True)
import tornado.ioloop
import tornado.web
import htmlboilerplate as html
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.set_status(200)
self.write(str(html.doctype()))
self.write(str(html.headtitle(
'This is an example title'
)))
def getAnotherRow():
return [
html.bgcolor('red', html.link(self.application.reverse_url('foo'), 'foo')),
html.bgcolor('blue', '<"oh" & snap!\'>'),
html.bgcolor('black', html.link('http://example.com/', 'foo & bar')),
]
self.write(str(html.table(
['col1header', 'col2header', 'col3header'],
[
['row1col1', 'row1col2', 'row1col3'],
getAnotherRow(),
]
)))
self.write('</body></html>')
def make_app():
return tornado.web.Application([
(r"/.*", MainHandler, {}, 'foo'),
(r"/.*", MainHandler, {}, 'bar'),
])
if __name__ == "__main__":
app = make_app()
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment