Skip to content

Instantly share code, notes, and snippets.

@jstimpfle
Last active April 19, 2017 18:05
Show Gist options
  • Save jstimpfle/a4f2661f8d042d9862b9fecdd85a7c93 to your computer and use it in GitHub Desktop.
Save jstimpfle/a4f2661f8d042d9862b9fecdd85a7c93 to your computer and use it in GitHub Desktop.
Replace magic HTML tags with the result of python calls.
"""Replace magic HTML tags with the result of function calls.
For now, only self-closing tags (<TAGNAME .../>) are supported. This is for
simplicity and also because otherwise we'd have to decide how to check for
proper nesting and to handle body arguments (lazy or strict call order, ... ?)
We don't even try to parse valid HTML here. We're just looking for
the next occurrence of "<TAGNAME" for any given TAGNAME.
This is easy to implement and better for performance.
Unfortunately that also means that things like
<p someattr="someval" someotherattr="foo <TAGNAME bar="baz"/>">
get replaced as well, which might be undesirable.
"""
import io
import re
class Invalid(Exception):
pass
def htmltagsreplace(fdict, html):
assert isinstance(fdict, dict)
assert isinstance(html, str)
assert all(tagname.isalnum() for tagname in fdict.keys())
if len(fdict) == 0:
# the tagpat below won't work, so this is a special case
return html
tagpat = r'<(%s)\b' %('|'.join([re.escape(key) for key in fdict.keys()]))
attrpat = r'\s+([a-zA-Z_][a-zA-Z0-9_-]*)="([^"]*)"'
out = io.StringIO()
i = 0
for m in re.finditer(tagpat, html):
out.write(html[i:m.start()])
tagname = m.group(1)
i = m.end()
attrs = {}
while True:
m = re.match(attrpat, html[i:])
if m is None:
break
key = m.group(1)
val = m.group(2) # XXX: want to replace HTML entities?
if key in attrs:
raise Invalid('Attribute "%s" used multiple times in magic tag <%s ...>' %(key, tagname))
attrs[key] = val
i = i + m.end()
m = re.match(r'\s*/>', html[i:])
if m is None:
raise Invalid('Missing close sequence (/>) in use of magic tag <%s ...>' %(tagname,))
i = i + m.end()
result = fdict[tagname](**attrs)
if not isinstance(result, str):
raise Invalid('Call to magic tag function <%s ...> did not return a string: %s' %(tagname, result))
out.write(result)
if i == 0:
return html # silly optimization
else:
out.write(html[i:])
return out.getvalue()
def testit():
import datetime
fdict = {
'foo': lambda **args: 'FOO' + args['bar'] + str(args.get('baz')),
'date': lambda: str(datetime.datetime.now())
}
result = htmltagsreplace(fdict, """
<p>abc<foo bar="42" baz="43" /></p>
<p><foo bar="xyz"/></p>
<p>Date: <date/></p>
""")
print(result)
if __name__ == '__main__':
testit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment