Created
March 16, 2012 07:47
-
-
Save asv/2049025 to your computer and use it in GitHub Desktop.
LDAP Filter parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import re | |
from funcparserlib.lexer import make_tokenizer | |
from funcparserlib.parser import some, skip, forward_decl, oneplus, many, maybe | |
_ABNF_GRAMMAR_DESCRIPTION = r""" | |
filter = LPAREN filtercomp RPAREN | |
filtercomp = and / or / not / item | |
and = AMPERSAND filterlist | |
or = VERTBAR filterlist | |
not = EXCLAMATION filter | |
filterlist = 1*filter | |
item = simple / present / substring / extensible | |
simple = attr filtertype assertionvalue | |
filtertype = equal / approx / greaterorequal / lessorequal | |
equal = EQUALS | |
approx = TILDE EQUALS | |
greaterorequal = RANGLE EQUALS | |
lessorequal = LANGLE EQUALS | |
extensible = ( attr [dnattrs] [atchingrule] COLON EQUALS assertionvalue ) / ( [dnattrs] matchingrule COLON EQUALS assertionvalue ) | |
present = attr EQUALS ASTERISK | |
substring = attr EQUALS [initial] any [final] | |
initial = assertionvalue | |
any = ASTERISK *(assertionvalue ASTERISK) | |
final = assertionvalue | |
attr = attributedescription | |
attributedescription = attributetype options | |
attributetype = oid | |
oid = descr / numericoid | |
descr = keystring | |
keystring = leadkeychar *keychar | |
leadkeychar = ALPHA | |
keychar = ALPHA / DIGIT / HYPHEN | |
numericoid = number 1*( DOT number ) | |
number = DIGIT / ( LDIGIT 1*DIGIT ) | |
options = *( SEMI option ) | |
option = 1*keychar | |
dnattrs = COLON "dn" | |
matchingrule = COLON oid | |
assertionvalue = valueencoding | |
valueencoding = 0*(normal / escaped) | |
normal = UTF1SUBSET / UTFMB | |
escaped = ESC HEX HEX | |
UTF1SUBSET = %x01-27 / %x2B-5B / %x5D-7F | |
UTFMB = UTF2 / UTF3 / UTF4 | |
UTF0 = %x80-BF | |
UTF1 = %x00-7F | |
UTF2 = %xC2-DF UTF0 | |
UTF3 = %xE0 %xA0-BF UTF0 / %xE1-EC 2(UTF0) / %xED %x80-9F UTF0 / %xEE-EF 2(UTF0) | |
UTF4 = %xF0 %x90-BF 2(UTF0) / %xF1-F3 3(UTF0) / %xF4 %x80-8F 2(UTF0) | |
ESC = %x5C ; backslash ("\") | |
ALPHA = %x41-5A / %x61-7A ; "A"-"Z" / "a"-"z" | |
DIGIT = %x30 / LDIGIT ; "0"-"9" | |
LDIGIT = %x31-39 ; "1"-"9" | |
HEX = DIGIT / %x41-46 / %x61-66 ; "0"-"9" / "A"-"F" / "a"-"f" | |
EXCLAMATION = %x21 ; exclamation mark ("!") | |
AMPERSAND = %x26 ; ampersand (or AND symbol) ("&") | |
ASTERISK = %x2A ; asterisk ("*") | |
COLON = %x3A ; colon (":") | |
VERTBAR = %x7C ; vertical bar (or pipe) ("|") | |
TILDE = %x7E ; tilde ("~") | |
""" | |
def tokenize(s): | |
""" Tokenizer | |
>>> tokenize('(test=abc)') | |
[Token('LParen', '('), Token('AttrName', 'test'), Token('Equal', '='), Token('AttrValue', 'abc'), Token('RParen', ')')] | |
""" | |
valueencodings = { | |
'escaped': ur""" | |
[\x01-\x27\x2B-\x5B\x5D-\x7F]+ | |
""", | |
'normal': ur""" | |
\\ | |
[0-9A-Fa-f]{2} | |
""", | |
} | |
specs = [ | |
('Space', (r'[ \t\r\n]+',)), | |
('And', (ur'\x26',)), | |
('Or', (ur'\x7C',)), | |
('Not', (ur'\x21',)), | |
('LParen', (ur'\x28',)), | |
('RParen', (ur'\x29',)), | |
('Asterisk', (ur'\x2A',)), | |
('Equal', ('=',)), | |
('Approx', ('~=',)), | |
('Greaterorequal', ('>=',)), | |
('Lessorequal', ('<=',)), | |
('AttrName', (r"""[a-zA-Z][a-zA-Z0-9-]* | [0-9][0-9.]+[0-9]""", re.VERBOSE)), | |
('AttrValue', (ur'(%(normal)s | %(escaped)s)*' % valueencodings, re.VERBOSE)), | |
] | |
useless = ['Space', 0] | |
t = make_tokenizer(specs) | |
return [x for x in t(s) if x.type not in useless] | |
def parse(tokens): | |
t = lambda s: some(lambda tok: tok.type == s) | |
filter = forward_decl() | |
filterlist = oneplus(filter) | |
and_op = t('And') + filterlist | |
or_op = t('Or') + filterlist | |
not_op = t('Not') + filter | |
lpar = skip(t('LParen')) | |
rpar = skip(t('RParen')) | |
attributedescription = t('AttrName') | |
attr = attributedescription | |
equal = t('Equal') | |
approx = t('Approx') | |
greaterorequal = t('Greaterorequal') | |
lessorequal = t('Lessorequal') | |
asterisk = t('Asterisk') | |
filtertype = equal | approx | greaterorequal | lessorequal | |
assertionvalue = t('AttrValue') | |
simple = attr + filtertype + assertionvalue | |
present = attr + equal + asterisk | |
initial = assertionvalue | |
final = assertionvalue | |
any = asterisk + maybe(assertionvalue | asterisk) | |
substring = attr + equal + maybe(initial) + any + maybe(final) | |
item = simple | present | substring | |
filtercomp = and_op | or_op | not_op | item | |
filter.define(lpar + filtercomp + rpar) | |
return filter.parse(tokens) | |
print (tokenize('(test=abc)')) | |
#print parse(tokenize('(test=abc)')) | |
if __name__ == '__main__': | |
import doctest | |
# doctest.testmod(verbose=True, raise_on_error=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment