Skip to content

Instantly share code, notes, and snippets.

@asv
Created March 16, 2012 07:47
Show Gist options
  • Save asv/2049025 to your computer and use it in GitHub Desktop.
Save asv/2049025 to your computer and use it in GitHub Desktop.
LDAP Filter parser
# -*- coding: utf-8 -*-
import re
from funcparserlib.lexer import make_tokenizer
from funcparserlib.parser import some, skip, forward_decl, oneplus, many, maybe
_ABNF_GRAMMAR_DESCRIPTION = r"""
filter = LPAREN filtercomp RPAREN
filtercomp = and / or / not / item
and = AMPERSAND filterlist
or = VERTBAR filterlist
not = EXCLAMATION filter
filterlist = 1*filter
item = simple / present / substring / extensible
simple = attr filtertype assertionvalue
filtertype = equal / approx / greaterorequal / lessorequal
equal = EQUALS
approx = TILDE EQUALS
greaterorequal = RANGLE EQUALS
lessorequal = LANGLE EQUALS
extensible = ( attr [dnattrs] [atchingrule] COLON EQUALS assertionvalue ) / ( [dnattrs] matchingrule COLON EQUALS assertionvalue )
present = attr EQUALS ASTERISK
substring = attr EQUALS [initial] any [final]
initial = assertionvalue
any = ASTERISK *(assertionvalue ASTERISK)
final = assertionvalue
attr = attributedescription
attributedescription = attributetype options
attributetype = oid
oid = descr / numericoid
descr = keystring
keystring = leadkeychar *keychar
leadkeychar = ALPHA
keychar = ALPHA / DIGIT / HYPHEN
numericoid = number 1*( DOT number )
number = DIGIT / ( LDIGIT 1*DIGIT )
options = *( SEMI option )
option = 1*keychar
dnattrs = COLON "dn"
matchingrule = COLON oid
assertionvalue = valueencoding
valueencoding = 0*(normal / escaped)
normal = UTF1SUBSET / UTFMB
escaped = ESC HEX HEX
UTF1SUBSET = %x01-27 / %x2B-5B / %x5D-7F
UTFMB = UTF2 / UTF3 / UTF4
UTF0 = %x80-BF
UTF1 = %x00-7F
UTF2 = %xC2-DF UTF0
UTF3 = %xE0 %xA0-BF UTF0 / %xE1-EC 2(UTF0) / %xED %x80-9F UTF0 / %xEE-EF 2(UTF0)
UTF4 = %xF0 %x90-BF 2(UTF0) / %xF1-F3 3(UTF0) / %xF4 %x80-8F 2(UTF0)
ESC = %x5C ; backslash ("\")
ALPHA = %x41-5A / %x61-7A ; "A"-"Z" / "a"-"z"
DIGIT = %x30 / LDIGIT ; "0"-"9"
LDIGIT = %x31-39 ; "1"-"9"
HEX = DIGIT / %x41-46 / %x61-66 ; "0"-"9" / "A"-"F" / "a"-"f"
EXCLAMATION = %x21 ; exclamation mark ("!")
AMPERSAND = %x26 ; ampersand (or AND symbol) ("&")
ASTERISK = %x2A ; asterisk ("*")
COLON = %x3A ; colon (":")
VERTBAR = %x7C ; vertical bar (or pipe) ("|")
TILDE = %x7E ; tilde ("~")
"""
def tokenize(s):
""" Tokenizer
>>> tokenize('(test=abc)')
[Token('LParen', '('), Token('AttrName', 'test'), Token('Equal', '='), Token('AttrValue', 'abc'), Token('RParen', ')')]
"""
valueencodings = {
'escaped': ur"""
[\x01-\x27\x2B-\x5B\x5D-\x7F]+
""",
'normal': ur"""
\\
[0-9A-Fa-f]{2}
""",
}
specs = [
('Space', (r'[ \t\r\n]+',)),
('And', (ur'\x26',)),
('Or', (ur'\x7C',)),
('Not', (ur'\x21',)),
('LParen', (ur'\x28',)),
('RParen', (ur'\x29',)),
('Asterisk', (ur'\x2A',)),
('Equal', ('=',)),
('Approx', ('~=',)),
('Greaterorequal', ('>=',)),
('Lessorequal', ('<=',)),
('AttrName', (r"""[a-zA-Z][a-zA-Z0-9-]* | [0-9][0-9.]+[0-9]""", re.VERBOSE)),
('AttrValue', (ur'(%(normal)s | %(escaped)s)*' % valueencodings, re.VERBOSE)),
]
useless = ['Space', 0]
t = make_tokenizer(specs)
return [x for x in t(s) if x.type not in useless]
def parse(tokens):
t = lambda s: some(lambda tok: tok.type == s)
filter = forward_decl()
filterlist = oneplus(filter)
and_op = t('And') + filterlist
or_op = t('Or') + filterlist
not_op = t('Not') + filter
lpar = skip(t('LParen'))
rpar = skip(t('RParen'))
attributedescription = t('AttrName')
attr = attributedescription
equal = t('Equal')
approx = t('Approx')
greaterorequal = t('Greaterorequal')
lessorequal = t('Lessorequal')
asterisk = t('Asterisk')
filtertype = equal | approx | greaterorequal | lessorequal
assertionvalue = t('AttrValue')
simple = attr + filtertype + assertionvalue
present = attr + equal + asterisk
initial = assertionvalue
final = assertionvalue
any = asterisk + maybe(assertionvalue | asterisk)
substring = attr + equal + maybe(initial) + any + maybe(final)
item = simple | present | substring
filtercomp = and_op | or_op | not_op | item
filter.define(lpar + filtercomp + rpar)
return filter.parse(tokens)
print (tokenize('(test=abc)'))
#print parse(tokenize('(test=abc)'))
if __name__ == '__main__':
import doctest
# doctest.testmod(verbose=True, raise_on_error=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment