Skip to content

Instantly share code, notes, and snippets.

@weaver
Created February 6, 2010 05:01
Show Gist options
  • Save weaver/296549 to your computer and use it in GitHub Desktop.
Save weaver/296549 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""ply-path -- analysis of an path expression using PLY
Example: ply-path.py 'child::para[position()>1]'
"""
import sys
from ply import lex
def usage():
print __doc__
print 'usage: %s expression' % sys.argv[0]
sys.exit(1)
def expression():
"""Subset of <http://www.w3.org/TR/xpath/#exprlex>."""
tokens = ['BRACKET', 'ABBR', 'NUM', 'LITERAL', 'VAR', 'NAME']
reserved = {
'ancestor': 'AXIS',
'attribute': 'AXIS',
'child': 'AXIS',
'descendant': 'AXIS',
'parent': 'AXIS',
'and': 'OP',
'or': 'OP',
'mod': 'OP',
'div': 'OP',
}
tokens.extend(set(reserved.itervalues()))
t_BRACKET = r'\(|\)|\[|\]'
t_ABBR = r'\.{1,2}|@|,|::'
t_LITERAL = r'"([^"]*)"|\'([^\']*)\''
t_OP = r'\*|/|//|\||\+|\-|=|!=|<|<=|>|>='
t_VAR = r'\$([a-zA-Z][\w\-]*)'
def t_NUM(t):
r'\d+(?:\.\d*)?|\.\d+'
t.value = (float if '.' in t.value else int)(t.value)
return t
def t_NAME(t):
ur'\*|[a-zA-Z][\w\-]*'
t.type = reserved.get(t.value, 'NAME')
return t
t_ignore = ' \t\n\r'
def t_error(t):
print 'Illegal character %r at %r' % (t.value[0], t.value[0:15])
return lex.lex()
def main(path):
expr = expression()
expr.input(path)
while True:
tok = expr.token()
if not tok:
break
print tok
if __name__ == '__main__':
if len(sys.argv) != 2:
usage()
main(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment