Skip to content

Instantly share code, notes, and snippets.

@etrepum
Created February 27, 2009 03:43
Show Gist options
  • Save etrepum/71271 to your computer and use it in GitHub Desktop.
Save etrepum/71271 to your computer and use it in GitHub Desktop.
tal2genshi.py
#!/usr/bin/env python
"""
Translate the subset of TAL that MochiBot uses to Genshi
* tal:attributes
* tal:content
* tal:replace
* tal:repeat
* tal:condition
Special values used by MochiBot:
* default
* repeat/*/odd (only used to produce "" or "1")
Edge cases
* some templates use ${...} in attributes and elsewhere, but I think
this case is covered
* structure can be tricky, but I think Markup is equivalent enough
* the "repeat" object doesn't really exist in genshi but it's only used to
do alternating table rows and we can do that with a helper
* "| default" is especially hard outside of attributes,
so the templates were unrolled
References:
* http://phptal.motion-twin.com/manual/en/
* http://wiki.zope.org/ZPT/TALESSpecification13
* http://genshi.edgewall.org/wiki/Documentation/0.5.x/xml-templates.html
"""
import os
import sys
from genshi import XML, Namespace, QName, Attrs
tal = Namespace(u'http://xml.zope.org/namespaces/tal')
py = Namespace(u'http://genshi.edgewall.org/')
xi = Namespace(u'http://www.w3.org/2001/XInclude')
def tal_attr(expr, attrs):
# Note that there is an escaped form of ; but we don't have any in our
# documents, so this simple tokenizer works
subexprs = filter(None, (s.strip() for s in expr.split(';')))
parts = [tal_attr_subexpr(subex, attrs) for subex in subexprs]
return '{' + ', '.join(parts) + '}'
def tal_slash(expr):
if expr.startswith('string:'):
return tal_attr_string(expr[len('string:'):].strip())
elif expr.startswith('repeat/') and expr.endswith('/odd'):
# This is a gross hack but it should work since we don't nest
# any of the repeat clauses
return "h.textcycle('', '1')"
elif expr.startswith('not:'):
return 'not ' + tal_slash(expr[4:].strip())
elif expr.startswith('exists:'):
var = tal_slash(expr[7:].strip())
if '.' in var:
return var
else:
return 'defined("' + var + '")'
elif expr.startswith('structure '):
expr = expr[len('structure '):].strip()
return 'Markup(' + tal_slash(expr) + ')'
else:
return expr.replace('/', '.').strip()
def tokenize_var(expr):
while expr:
try:
var_idx = expr.index('${')
end_idx = expr.index('}', var_idx)
except ValueError:
yield 'string', expr
break
else:
if var_idx:
yield 'string', expr[:var_idx]
yield 'var', expr[var_idx + 2:end_idx]
expr = expr[end_idx + 1:]
def tal_attr_string(expr):
"""
Transform "string: foo${bar}"
"""
lst = []
for kind, data in tokenize_var(expr):
if kind == 'string':
lst.append(repr(data).lstrip('u'))
elif kind == 'var':
lst.append(tal_slash(data))
return ' + '.join(lst)
def tal_attr_subexpr(subexpr, attrs):
subexpr = subexpr.encode('utf-8')
name, expr = [s.strip() for s in subexpr.split(' ', 1)]
# Always used as exactly "| default"
default = ''
if expr.endswith('| default'):
expr = expr[:-len('| default')].strip()
value = attrs.get(name)
if value is not None:
default = ' or ' + repr(value).lstrip('u')
expr = tal_slash(expr)
return repr(name) + ': ' + expr + default
class BaseTALFilter(object):
def __init__(self):
self.xforms = []
def __call__(self, stream):
for kind, data, pos in stream:
if kind == 'START':
tag, attrs = data
data = tag, Attrs(self.tal_attr(attr, attrs) for attr in attrs)
elif kind == 'TEXT' and '${' in data:
data = self.tal_inline(data)
elif kind == 'START_NS' and data == ('tal', tal.uri):
for kdp in self.tal_ns(kind, data, pos):
yield kdp
continue
yield kind, data, pos
def tal_ns(self, kind, data, pos):
yield kind, data, pos
def tal_attr(self, (qa, value), attrs):
if isinstance(qa, QName) and qa.namespace == tal.uri:
attr = qa.localname
rval = self.tal_translate(qa, value, attrs)
self.xforms.append(((attr, value), rval))
#print (attr, value), '->', rval
return rval
elif '${' in value:
return qa, self.tal_inline(value)
else:
return qa, value
def tal_inline(self, expr):
"""
Transform "foo${bar}" inside plain HTML text or attributes
"""
lst = []
for kind, data in tokenize_var(expr):
if kind == 'string':
lst.append(data)
elif kind == 'var':
rval = self.tal_translate_inline(data)
self.xforms.append((('INLINE', data), ('INLINE', rval)))
lst.append('${' + rval + '}')
return ''.join(lst)
def tal_translate(self, qa, value, attrs):
return (qa, value)
def tal_translate_inline(self, data):
return data
class TALGenshiFilter(BaseTALFilter):
def tal_ns(self, kind, data, pos):
yield kind, ('py', py.uri), pos
yield kind, ('xi', xi.uri), pos
def tal_translate(self, qa, value, attrs):
return getattr(self, 't_' + qa.localname)(value, attrs)
def t_attributes(self, value, attrs):
return py['attrs'], tal_attr(value, attrs)
def t_content(self, value, attrs):
return py['content'], tal_slash(value)
def t_condition(self, value, attrs):
return py['if'], tal_slash(value)
def t_replace(self, value, attrs):
if not value.strip():
# tal:replace="" translates better to py:if="False"
return py['if'], "False"
else:
return py['replace'], tal_slash(value)
def t_repeat(self, value, attrs):
key, expr = value.split(' ', 1)
return py['for'], '%s in %s' % (key, tal_slash(expr.strip()))
def tal_translate_inline(self, value):
return tal_slash(value)
def main(files):
for fn in files:
doc = open(fn, 'rb').read()
if not os.path.exists(fn + '.php'):
if tal.uri.encode('utf-8') in doc:
print 'WARNING TAL ns in plain doc:', fn
continue
try:
doc = doc.decode('utf-8')
except UnicodeDecodeError:
print 'ERROR:', fn
raise
if tal.uri not in doc:
if '<html>' in doc:
print 'FIXED TAL ns:', fn
doc = doc.replace('<html>', '<html xmlns:tal="http://xml.zope.org/namespaces/tal">')
open(fn, 'wb').write(doc.encode('utf-8'))
else:
if doc.strip().startswith('<!--') and doc.strip().endswith('-->'):
continue
print 'MISSING TAL ns:', fn
else:
print fn
LAME = '<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />'
if LAME in doc:
print 'FIXED iso-8859-1 lameness:', fn
doc = doc.replace(LAME, LAME.replace('charset=iso-8859-1', 'charset=utf-8'))
open(fn, 'wb').write(doc.encode('utf-8'))
xform = TALGenshiFilter()
d1 = XML(doc).filter(xform).render()
open(fn, 'wb').write(d1)
if 0 and xform.xforms:
for old, new in xform.xforms:
attr, value = old
print 'tal:%s = %r' % (attr, value.encode('utf-8'))
nattr, nvalue = new
try:
nname = nattr.localname
except AttributeError:
nname = nattr
print 'py:%s = %r' % (nname, nvalue.encode('utf-8'))
print ''
def main_old(files):
all_expressions = set()
for fn in files:
doc = open(fn, 'rb').read()
if not os.path.exists(fn + '.php'):
if tal.uri.encode('utf-8') in doc:
print 'WARNING TAL ns in plain doc:', fn
continue
try:
doc = doc.decode('utf-8')
except UnicodeDecodeError:
print 'ERROR:', fn
raise
if tal.uri not in doc:
if '<html>' in doc:
print 'FIXED TAL ns:', fn
doc = doc.replace('<html>', '<html xmlns:tal="http://xml.zope.org/namespaces/tal">')
open(fn, 'wb').write(doc.encode('utf-8'))
else:
if doc.strip().startswith('<!--') and doc.strip().endswith('-->'):
continue
print 'MISSING TAL ns:', fn
else:
print fn
LAME = '<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />'
if LAME in doc:
print 'FIXED iso-8859-1 lameness:', fn
doc = doc.replace(LAME, LAME.replace('charset=iso-8859-1', 'charset=utf-8'))
open(fn, 'wb').write(doc.encode('utf-8'))
xform = BaseTALFilter()
d0 = XML(doc).render()
d1 = XML(doc).filter(xform).render()
if xform.xforms:
for old, new in xform.xforms:
attr, value = old
all_expressions.add(old)
#print ' tal:%s = %r' % (attr, value.encode('utf-8'))
print ''
for expr in sorted(all_expressions):
print '%s = %r' % tuple(s.encode('utf-8') for s in expr)
if __name__ == '__main__':
files = sys.argv[1:]
main(files)
#main_old(files)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment