Skip to content

Instantly share code, notes, and snippets.

@niklasl
Created January 26, 2023 21:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save niklasl/cc5df8d5500a3e813e2bcf1a48260213 to your computer and use it in GitHub Desktop.
Save niklasl/cc5df8d5500a3e813e2bcf1a48260213 to your computer and use it in GitHub Desktop.
"""
A simple query language parser experiment.
"""
OPERATORS: dict[str, tuple[str | None, int, str | int]] = {
'and': ('AND', -1, 'AND'),
'or': ('OR', -1, 'AND'),
'not': ('NOT', 0, 1),
'^': ('INV', 0, 1),
'=': ('EQ', -1, 1),
'!=': ('NEQ', -1, 1),
'=~': ('MATCH', 0, 1),
}
for alias, key in [('&&', 'and'), ('||', 'or')]:
OPERATORS[alias] = OPERATORS[key]
SPECIALS = {key for key in OPERATORS if len(key) == 1}
def tokenize(s: str, at=0) -> tuple[list, int]:
tokens: list = []
word: list[str] = []
quote_start = False
escaped = False
while at < len(s):
c = s[at]
at += 1
if escaped:
escaped = False
word.append(c)
continue
if c == '\\':
escaped = True
continue
if quote_start and c != '"':
word.append(c)
continue
if c == '"':
quote_start = not quote_start
continue
if c == '(':
item, at = tokenize(s, at)
tokens.append(item)
continue
elif c == ')':
_add(tokens, word)
word = []
return tokens, at
if c in SPECIALS:
_add(tokens, word)
word = []
_add(tokens, [c])
elif c in {' ', '\n', '\r', '\t'}:
_add(tokens, word)
word = []
else:
word.append(c)
if word:
_add(tokens, word)
return tokens, at
def _add(tokens: list, word: list[str]):
if word:
tokens.append(''.join(word) if isinstance(word, list) else word)
def parse(tokens: list, slurp_until: str | int = 0) -> list:
tree: list[object] = []
item = None
while tokens:
item = tokens.pop(0)
if isinstance(item, list):
tree.append(parse(item))
elif isinstance(item, str):
operator, prev, next_slurp_until = _get_operator(item)
if operator is not None:
collected = parse(tokens, next_slurp_until)
if prev == -1:
collected.insert(0, tree.pop(0))
if isinstance(next_slurp_until, str):
tree.append({operator: collected})
return tree
else:
tree.append({operator: collected})
else:
tree.append(item)
else:
raise ValueError(f"Unknown item: {item}")
if slurp_until == item:
break
if isinstance(slurp_until, int) and len(tree) == slurp_until:
break
return tree
def _get_operator(word: str) -> tuple[str | None, int, str | int]:
return OPERATORS.get(word.lower(), (None, 0, 0))
if __name__ == '__main__':
q = r"""
a = "A A A" and (b = "B B" or c = "C = \"C\" or B != \\n")
and (not d or ^e)
and (
not (
(f and not (g or h))
or i
)
)
"""
print(q)
tokentree, at = tokenize(q)
print(tokentree)
import json
tree = parse(tokentree[:])
print(json.dumps(tree, indent=2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment