Created
April 8, 2012 04:52
-
-
Save asv/2334780 to your computer and use it in GitHub Desktop.
Simple rfc4515 parser by funcparserlib
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import re | |
from funcparserlib.lexer import make_tokenizer | |
from funcparserlib.parser import some, skip, forward_decl, oneplus, maybe | |
# https://svn.process-one.net/tsung/trunk/src/lib/rfc4515_parser.erl | |
_ABNF_GRAMMAR_DESCRIPTION = r""" | |
filter = LPAREN filtercomp RPAREN | |
filtercomp = and / or / not / item | |
and = AMPERSAND filterlist | |
or = VERTBAR filterlist | |
not = EXCLAMATION filter | |
filterlist = 1*filter | |
item = simple / present / substring / extensible | |
simple = attr filtertype assertionvalue | |
filtertype = equal / approx / greaterorequal / lessorequal | |
equal = EQUALS | |
approx = TILDE EQUALS | |
greaterorequal = RANGLE EQUALS | |
lessorequal = LANGLE EQUALS | |
extensible = ( attr [dnattrs] [atchingrule] COLON EQUALS assertionvalue ) / ( [dnattrs] matchingrule COLON EQUALS assertionvalue ) | |
present = attr EQUALS ASTERISK | |
substring = attr EQUALS [initial] any [final] | |
initial = assertionvalue | |
any = ASTERISK *(assertionvalue ASTERISK) | |
final = assertionvalue | |
attr = attributedescription | |
attributedescription = attributetype options | |
attributetype = oid | |
oid = descr / numericoid | |
descr = keystring | |
keystring = leadkeychar *keychar | |
leadkeychar = ALPHA | |
keychar = ALPHA / DIGIT / HYPHEN | |
numericoid = number 1*( DOT number ) | |
number = DIGIT / ( LDIGIT 1*DIGIT ) | |
options = *( SEMI option ) | |
option = 1*keychar | |
dnattrs = COLON "dn" | |
matchingrule = COLON oid | |
assertionvalue = valueencoding | |
valueencoding = 0*(normal / escaped) | |
normal = UTF1SUBSET / UTFMB | |
escaped = ESC HEX HEX | |
UTF1SUBSET = %x01-27 / %x2B-5B / %x5D-7F | |
UTFMB = UTF2 / UTF3 / UTF4 | |
UTF0 = %x80-BF | |
UTF1 = %x00-7F | |
UTF2 = %xC2-DF UTF0 | |
UTF3 = %xE0 %xA0-BF UTF0 / %xE1-EC 2(UTF0) / %xED %x80-9F UTF0 / %xEE-EF 2(UTF0) | |
UTF4 = %xF0 %x90-BF 2(UTF0) / %xF1-F3 3(UTF0) / %xF4 %x80-8F 2(UTF0) | |
ESC = %x5C ; backslash ("\") | |
ALPHA = %x41-5A / %x61-7A ; "A"-"Z" / "a"-"z" | |
DIGIT = %x30 / LDIGIT ; "0"-"9" | |
LDIGIT = %x31-39 ; "1"-"9" | |
HEX = DIGIT / %x41-46 / %x61-66 ; "0"-"9" / "A"-"F" / "a"-"f" | |
EXCLAMATION = %x21 ; exclamation mark ("!") | |
AMPERSAND = %x26 ; ampersand (or AND symbol) ("&") | |
ASTERISK = %x2A ; asterisk ("*") | |
COLON = %x3A ; colon (":") | |
VERTBAR = %x7C ; vertical bar (or pipe) ("|") | |
TILDE = %x7E ; tilde ("~") | |
""" | |
def tokenize(s): | |
""" Tokenizer | |
>>> tokenize('(test=abc)') | |
[Token('LParen', '('), Token('AttrName', 'test'), Token('Equal', '='), Token('AttrValue', 'abc'), Token('RParen', ')')] | |
""" | |
valueencodings = { | |
'escaped': ur""" | |
[\x01-\x27\x2B-\x5B\x5D-\x7F]+ | |
""", | |
'normal': ur""" | |
\\ | |
[0-9A-Fa-f]{2} | |
""", | |
} | |
specs = [ | |
('Space', (r'[ \t\r\n]+',)), | |
('And', (ur'\x26',)), | |
('Or', (ur'\x7C',)), | |
('Not', (ur'\x21',)), | |
('LParen', (ur'\x28',)), | |
('RParen', (ur'\x29',)), | |
('Asterisk', (ur'\x2A',)), | |
('Equal', ('=',)), | |
('Approx', ('~=',)), | |
('Greaterorequal', ('>=',)), | |
('Lessorequal', ('<=',)), | |
('AttrName', (r"""[a-zA-Z][a-zA-Z0-9-]* | [0-9][0-9.]+[0-9]""", re.VERBOSE)), | |
('AttrValue', (ur'(%(normal)s | %(escaped)s)*' % valueencodings, re.VERBOSE)), | |
] | |
useless = ['Space', 0] | |
t = make_tokenizer(specs) | |
return [x for x in t(s) if x.type not in useless] | |
def parse(tokens): | |
t = lambda s: some(lambda tok: tok.type == s) | |
filter = forward_decl() | |
filterlist = oneplus(filter) | |
and_op = t('And') + filterlist | |
or_op = t('Or') + filterlist | |
not_op = t('Not') + filter | |
lpar = skip(t('LParen')) | |
rpar = skip(t('RParen')) | |
attributedescription = t('AttrName') | |
attr = attributedescription | |
equal = t('Equal') | |
approx = t('Approx') | |
greaterorequal = t('Greaterorequal') | |
lessorequal = t('Lessorequal') | |
asterisk = t('Asterisk') | |
filtertype = equal | approx | greaterorequal | lessorequal | |
assertionvalue = t('AttrValue') | |
simple = attr + filtertype + assertionvalue | |
present = attr + equal + asterisk | |
initial = assertionvalue | |
final = assertionvalue | |
any = asterisk + maybe(assertionvalue | asterisk) | |
substring = attr + equal + maybe(initial) + any + maybe(final) | |
item = simple | present | substring | |
filtercomp = and_op | or_op | not_op | item | |
filter.define(lpar + filtercomp + rpar) | |
return filter.parse(tokens) | |
print (tokenize('(test=abc)')) | |
#print parse(tokenize('(test=abc)')) | |
if __name__ == '__main__': | |
import doctest | |
doctest.testmod(verbose=True, raise_on_error=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment