Created
February 27, 2009 03:43
-
-
Save etrepum/71271 to your computer and use it in GitHub Desktop.
tal2genshi.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Translate the subset of TAL that MochiBot uses to Genshi | |
* tal:attributes | |
* tal:content | |
* tal:replace | |
* tal:repeat | |
* tal:condition | |
Special values used by MochiBot: | |
* default | |
* repeat/*/odd (only used to produce "" or "1") | |
Edge cases | |
* some templates use ${...} in attributes and elsewhere, but I think | |
this case is covered | |
* structure can be tricky, but I think Markup is equivalent enough | |
* the "repeat" object doesn't really exist in genshi but it's only used to | |
do alternating table rows and we can do that with a helper | |
* "| default" is especially hard outside of attributes, | |
so the templates were unrolled | |
References: | |
* http://phptal.motion-twin.com/manual/en/ | |
* http://wiki.zope.org/ZPT/TALESSpecification13 | |
* http://genshi.edgewall.org/wiki/Documentation/0.5.x/xml-templates.html | |
""" | |
import os | |
import sys | |
from genshi import XML, Namespace, QName, Attrs | |
tal = Namespace(u'http://xml.zope.org/namespaces/tal') | |
py = Namespace(u'http://genshi.edgewall.org/') | |
xi = Namespace(u'http://www.w3.org/2001/XInclude') | |
def tal_attr(expr, attrs): | |
# Note that there is an escaped form of ; but we don't have any in our | |
# documents, so this simple tokenizer works | |
subexprs = filter(None, (s.strip() for s in expr.split(';'))) | |
parts = [tal_attr_subexpr(subex, attrs) for subex in subexprs] | |
return '{' + ', '.join(parts) + '}' | |
def tal_slash(expr): | |
if expr.startswith('string:'): | |
return tal_attr_string(expr[len('string:'):].strip()) | |
elif expr.startswith('repeat/') and expr.endswith('/odd'): | |
# This is a gross hack but it should work since we don't nest | |
# any of the repeat clauses | |
return "h.textcycle('', '1')" | |
elif expr.startswith('not:'): | |
return 'not ' + tal_slash(expr[4:].strip()) | |
elif expr.startswith('exists:'): | |
var = tal_slash(expr[7:].strip()) | |
if '.' in var: | |
return var | |
else: | |
return 'defined("' + var + '")' | |
elif expr.startswith('structure '): | |
expr = expr[len('structure '):].strip() | |
return 'Markup(' + tal_slash(expr) + ')' | |
else: | |
return expr.replace('/', '.').strip() | |
def tokenize_var(expr): | |
while expr: | |
try: | |
var_idx = expr.index('${') | |
end_idx = expr.index('}', var_idx) | |
except ValueError: | |
yield 'string', expr | |
break | |
else: | |
if var_idx: | |
yield 'string', expr[:var_idx] | |
yield 'var', expr[var_idx + 2:end_idx] | |
expr = expr[end_idx + 1:] | |
def tal_attr_string(expr): | |
""" | |
Transform "string: foo${bar}" | |
""" | |
lst = [] | |
for kind, data in tokenize_var(expr): | |
if kind == 'string': | |
lst.append(repr(data).lstrip('u')) | |
elif kind == 'var': | |
lst.append(tal_slash(data)) | |
return ' + '.join(lst) | |
def tal_attr_subexpr(subexpr, attrs): | |
subexpr = subexpr.encode('utf-8') | |
name, expr = [s.strip() for s in subexpr.split(' ', 1)] | |
# Always used as exactly "| default" | |
default = '' | |
if expr.endswith('| default'): | |
expr = expr[:-len('| default')].strip() | |
value = attrs.get(name) | |
if value is not None: | |
default = ' or ' + repr(value).lstrip('u') | |
expr = tal_slash(expr) | |
return repr(name) + ': ' + expr + default | |
class BaseTALFilter(object): | |
def __init__(self): | |
self.xforms = [] | |
def __call__(self, stream): | |
for kind, data, pos in stream: | |
if kind == 'START': | |
tag, attrs = data | |
data = tag, Attrs(self.tal_attr(attr, attrs) for attr in attrs) | |
elif kind == 'TEXT' and '${' in data: | |
data = self.tal_inline(data) | |
elif kind == 'START_NS' and data == ('tal', tal.uri): | |
for kdp in self.tal_ns(kind, data, pos): | |
yield kdp | |
continue | |
yield kind, data, pos | |
def tal_ns(self, kind, data, pos): | |
yield kind, data, pos | |
def tal_attr(self, (qa, value), attrs): | |
if isinstance(qa, QName) and qa.namespace == tal.uri: | |
attr = qa.localname | |
rval = self.tal_translate(qa, value, attrs) | |
self.xforms.append(((attr, value), rval)) | |
#print (attr, value), '->', rval | |
return rval | |
elif '${' in value: | |
return qa, self.tal_inline(value) | |
else: | |
return qa, value | |
def tal_inline(self, expr): | |
""" | |
Transform "foo${bar}" inside plain HTML text or attributes | |
""" | |
lst = [] | |
for kind, data in tokenize_var(expr): | |
if kind == 'string': | |
lst.append(data) | |
elif kind == 'var': | |
rval = self.tal_translate_inline(data) | |
self.xforms.append((('INLINE', data), ('INLINE', rval))) | |
lst.append('${' + rval + '}') | |
return ''.join(lst) | |
def tal_translate(self, qa, value, attrs): | |
return (qa, value) | |
def tal_translate_inline(self, data): | |
return data | |
class TALGenshiFilter(BaseTALFilter): | |
def tal_ns(self, kind, data, pos): | |
yield kind, ('py', py.uri), pos | |
yield kind, ('xi', xi.uri), pos | |
def tal_translate(self, qa, value, attrs): | |
return getattr(self, 't_' + qa.localname)(value, attrs) | |
def t_attributes(self, value, attrs): | |
return py['attrs'], tal_attr(value, attrs) | |
def t_content(self, value, attrs): | |
return py['content'], tal_slash(value) | |
def t_condition(self, value, attrs): | |
return py['if'], tal_slash(value) | |
def t_replace(self, value, attrs): | |
if not value.strip(): | |
# tal:replace="" translates better to py:if="False" | |
return py['if'], "False" | |
else: | |
return py['replace'], tal_slash(value) | |
def t_repeat(self, value, attrs): | |
key, expr = value.split(' ', 1) | |
return py['for'], '%s in %s' % (key, tal_slash(expr.strip())) | |
def tal_translate_inline(self, value): | |
return tal_slash(value) | |
def main(files): | |
for fn in files: | |
doc = open(fn, 'rb').read() | |
if not os.path.exists(fn + '.php'): | |
if tal.uri.encode('utf-8') in doc: | |
print 'WARNING TAL ns in plain doc:', fn | |
continue | |
try: | |
doc = doc.decode('utf-8') | |
except UnicodeDecodeError: | |
print 'ERROR:', fn | |
raise | |
if tal.uri not in doc: | |
if '<html>' in doc: | |
print 'FIXED TAL ns:', fn | |
doc = doc.replace('<html>', '<html xmlns:tal="http://xml.zope.org/namespaces/tal">') | |
open(fn, 'wb').write(doc.encode('utf-8')) | |
else: | |
if doc.strip().startswith('<!--') and doc.strip().endswith('-->'): | |
continue | |
print 'MISSING TAL ns:', fn | |
else: | |
print fn | |
LAME = '<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />' | |
if LAME in doc: | |
print 'FIXED iso-8859-1 lameness:', fn | |
doc = doc.replace(LAME, LAME.replace('charset=iso-8859-1', 'charset=utf-8')) | |
open(fn, 'wb').write(doc.encode('utf-8')) | |
xform = TALGenshiFilter() | |
d1 = XML(doc).filter(xform).render() | |
open(fn, 'wb').write(d1) | |
if 0 and xform.xforms: | |
for old, new in xform.xforms: | |
attr, value = old | |
print 'tal:%s = %r' % (attr, value.encode('utf-8')) | |
nattr, nvalue = new | |
try: | |
nname = nattr.localname | |
except AttributeError: | |
nname = nattr | |
print 'py:%s = %r' % (nname, nvalue.encode('utf-8')) | |
print '' | |
def main_old(files): | |
all_expressions = set() | |
for fn in files: | |
doc = open(fn, 'rb').read() | |
if not os.path.exists(fn + '.php'): | |
if tal.uri.encode('utf-8') in doc: | |
print 'WARNING TAL ns in plain doc:', fn | |
continue | |
try: | |
doc = doc.decode('utf-8') | |
except UnicodeDecodeError: | |
print 'ERROR:', fn | |
raise | |
if tal.uri not in doc: | |
if '<html>' in doc: | |
print 'FIXED TAL ns:', fn | |
doc = doc.replace('<html>', '<html xmlns:tal="http://xml.zope.org/namespaces/tal">') | |
open(fn, 'wb').write(doc.encode('utf-8')) | |
else: | |
if doc.strip().startswith('<!--') and doc.strip().endswith('-->'): | |
continue | |
print 'MISSING TAL ns:', fn | |
else: | |
print fn | |
LAME = '<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />' | |
if LAME in doc: | |
print 'FIXED iso-8859-1 lameness:', fn | |
doc = doc.replace(LAME, LAME.replace('charset=iso-8859-1', 'charset=utf-8')) | |
open(fn, 'wb').write(doc.encode('utf-8')) | |
xform = BaseTALFilter() | |
d0 = XML(doc).render() | |
d1 = XML(doc).filter(xform).render() | |
if xform.xforms: | |
for old, new in xform.xforms: | |
attr, value = old | |
all_expressions.add(old) | |
#print ' tal:%s = %r' % (attr, value.encode('utf-8')) | |
print '' | |
for expr in sorted(all_expressions): | |
print '%s = %r' % tuple(s.encode('utf-8') for s in expr) | |
if __name__ == '__main__': | |
files = sys.argv[1:] | |
main(files) | |
#main_old(files) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment