Parsing OBAN, the Obviously Bogus Abject Notation
(* Copyright 2020 Derrick W. Turk / terminus data science, LLC | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. *) | |
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"; | |
number = digit, { digit }; | |
any_character = ? any UTF-8 code point ? | |
string = "<<", { (any_character - ">") | ("^", any_character) }, ">>" | |
triboolean = "True" | "False" | "FileNotFound"; | |
expression = number | string | triboolean | congregation | callout | |
congregation = "(", [ expression, { ",", expression } ], ")" | |
callout = "{", [ string, "!", expression, { "&", string, "!", expression } ], "}" |
# Copyright 2020 Derrick W. Turk / terminus data science, LLC | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import sys | |
from enum import Enum, auto | |
from typing import Any, Callable, Dict, List, NamedTuple, Tuple, TypeVar, Union | |
# data types we need for OBAN: | |
class TriBool(Enum): | |
FALSE = auto() | |
TRUE = auto() | |
FILENOTFOUND = auto() | |
# there's no way to write a recursive type alias: | |
# we'd prefer | |
# Expression = Union[int, str, TriBool, List[Expression], Dict[str, Expression]] | |
# but we can't, so we'll wrap it in a class | |
class Expression: | |
# type alias for what we can wrap into an expression | |
Wrappable = Union[ | |
int, | |
str, | |
TriBool, | |
List['Expression'], | |
Dict[str, 'Expression'] | |
] | |
def __init__(self, value: Wrappable): | |
self.value = value | |
def __repr__(self): | |
return f'Expression({repr(self.value)})' | |
# here's how we'd represent: | |
# { <<first>> ! (1, FileNotFound) | |
# & <<second>> ! <<some <<text^>^>>> | |
# & <<third>> ! { <<nested>> ! True } | |
# } | |
example: Expression | |
example = Expression({ | |
'first': Expression([Expression(1), Expression(TriBool.FILENOTFOUND)]), | |
'second': Expression('some <<text>>'), | |
'third': Expression({ 'nested': Expression(TriBool.TRUE) }) | |
}) | |
# now, a parser | |
_T = TypeVar('_T') | |
ParseResult = Union[Tuple[_T, str], None] | |
Parser = Callable[[str], ParseResult[_T]] | |
# a parser which can't fail | |
InfallibleParser = Callable[[str], Tuple[_T, str]] | |
# some tools for building 'primitive' parsers | |
# the simplest parser: 'match exactly this string' | |
def exactly(match: str) -> Parser[str]: | |
def parser(input: str) -> ParseResult[str]: | |
if input.startswith(match): | |
return match, input[len(match):] | |
return None | |
return parser | |
# another basic one: succeeds at end of input; fails otherwise | |
# eof: Parser[None] | |
eof: Parser[None] | |
eof = lambda input: (None, input) if len(input) == 0 else None | |
# a parser which consumes a single character, if available | |
char: Parser[str] | |
char = lambda input: (input[0], input[1:]) if len(input) > 0 else None | |
# an 'infallible' parser which consumes characters matching a predicate | |
def chars_while(p: Callable[[str], bool]) -> InfallibleParser[str]: | |
def parser(input: str) -> Tuple[str, str]: | |
split_at = 0 | |
while split_at < len(input) and p(input[split_at]): | |
split_at += 1 | |
return input[:split_at], input[split_at:] | |
return parser | |
# use it to build a "consume whitespace" parser | |
whitespace: Parser[str] | |
whitespace = chars_while(str.isspace) | |
# some basic combinators for parsers | |
_U = TypeVar('_U') | |
# map a function over a parser's result, if it succeeds | |
def pmap(f: Callable[[_T], _U], p: Parser[_T]) -> Parser[_U]: | |
def parser(input: str) -> ParseResult[_U]: | |
res = p(input) | |
if res is None: | |
return None | |
x, rest = res | |
return f(x), rest | |
return parser | |
# a parser which always succeeds by producing a constant | |
def pure(x: _T) -> InfallibleParser[_T]: | |
return lambda input: (x, input) | |
# the opposite - a parser which always fails | |
# it's really a Parser[_T] for all _T, but we'll use Any since mypy | |
# isn't that sharp | |
fail: Parser[Any] | |
fail = lambda _: None | |
# replace a successful result by a constant | |
def cmap(x: _T, p: Parser[_U]) -> Parser[_T]: | |
return pmap(lambda _: x, p) | |
# these are harder to type in Python - we can do them for the fixed-arity case, | |
# but it's just too damn convenient to let these take *args | |
# chain multiple parsers together and produce a tuple of their results, | |
# if all succeed | |
def chain(*parsers: Parser[Any]) -> Parser[Tuple[Any, ...]]: | |
def parser(input: str) -> ParseResult[Tuple[Any, ...]]: | |
results = list() | |
for p in parsers: | |
res = p(input) | |
if res is None: | |
return None | |
val, input = res | |
results.append(val) | |
return tuple(results), input | |
return parser | |
# run multiple parsers, recovering from errors, until one succeeds; | |
# fail if all parsers fail | |
def alt(*parsers: Parser[Any]) -> Parser[Any]: | |
def parser(input: str) -> ParseResult[Any]: | |
for p in parsers: | |
res = p(input) | |
if res is not None: | |
return res | |
return None | |
return parser | |
# capture zero or more repetitions of a parser | |
def many(p: Parser[_T]) -> InfallibleParser[List[_T]]: | |
def parser(input: str) -> Tuple[List[_T], str]: | |
results: List[_T] = list() | |
while True: | |
res = p(input) | |
if res is None: | |
return results, input | |
val, input = res | |
results.append(val) | |
return parser | |
# we can use alt to define an "optional" parser combinator, which allows but does | |
# not require its base parser to match | |
def optional(p: Parser[_T]) -> Parser[Union[_T, None]]: | |
return alt(p, pure(None)) | |
# finally, a way to "decide" the next parser based on | |
# the result of the previous - this is what makes Parser a monad | |
def bind(p: Parser[_T], f: Callable[[_T], Parser[_U]]) -> Parser[_U]: | |
def parser(input: str) -> ParseResult[_U]: | |
res = p(input) | |
if res is None: | |
return None | |
x, input = res | |
return f(x)(input) | |
return parser | |
# we can use bind to build some interesting extensions to previous | |
# parsers | |
# like chars_while, but requires there to be at least one matching char | |
def chars_while1(p: Callable[[str], bool]) -> Parser[str]: | |
return bind(chars_while(p), lambda c: fail if len(c) == 0 else pure(c)) | |
# use it to build a "run of digits" parser | |
digits: Parser[str] | |
digits = chars_while1(str.isdigit) | |
# use it to build an "unsigned integer" parser | |
integer: Parser[int] | |
integer = pmap(int, digits) | |
# capture one or more repetitions of a parser | |
def some(p: Parser[_T]) -> Parser[List[_T]]: | |
return bind(many(p), lambda r: fail if len(r) == 0 else pure(r)) | |
# now we can start building some more interesting combinators | |
# mypy hates type-checking lambdas as arguments to pmap, so we'll need some | |
# tuple-extractor functions | |
def first(t: Tuple[Any, ...]) -> Any: | |
return t[0] | |
def second(t: Tuple[Any, ...]) -> Any: | |
return t[1] | |
# "only" p - match what p matches, then insist on end-of-input | |
def only(p: Parser[_T]) -> Parser[_T]: | |
return pmap(first, chain(p, eof)) | |
# match p, followed by optional whitespace | |
def lexeme(p: Parser[_T]) -> Parser[_T]: | |
return pmap(first, chain(p, whitespace)) | |
# match zero or more p, separated by sep | |
def sep_by(p: Parser[_T], sep: Parser[_U]) -> Parser[List[_T]]: | |
# mypy hates this thing's type | |
# it should be: | |
# def extract(t: Union[Tuple[_T, List[Tuple[_U, _T]]], None]) -> List[_T]: | |
# but that fails inference when we use it as an argument to pmap | |
def extract(t): | |
if t is None: | |
return [] | |
return [t[0]] + [u[1] for u in t[1]] | |
return pmap( | |
extract, | |
optional(chain(p, many(chain(sep, p))))) | |
# now we can write our recursive descent parser for OBAN | |
# we already have `integer` | |
tribool: Parser[TriBool] | |
tribool = alt( | |
cmap(TriBool.FALSE, exactly('False')), | |
cmap(TriBool.TRUE, exactly('True')), | |
cmap(TriBool.FILENOTFOUND, exactly('FileNotFound')) | |
) | |
# again, mypy isn't smart enough to follow the type of this "lambda" | |
def _string_extract(t): | |
_1, parts, _2 = t | |
return ''.join(p[1] if isinstance(p, tuple) else p for p in parts) | |
string: Parser[str] | |
string = pmap( | |
_string_extract, | |
chain( | |
exactly('<<'), | |
many(alt( | |
chars_while1(lambda c: c != '>' and c != '^'), | |
chain(exactly('^'), char) | |
)), | |
exactly('>>') | |
)) | |
# there's a circular dependency between congregation, callout, and expression | |
# we'll break it in expression, by making it a "proper" function | |
def expression(input: str) -> ParseResult[Expression]: | |
parser = pmap( | |
Expression, | |
lexeme(alt(integer, tribool, string, congregation, callout))) | |
return parser(input) | |
# in a lazier world, this would work: | |
# expression: Parser[Expression] | |
# expression = pmap( | |
# Expression, | |
# lexeme(alt(integer, tribool, string, congregation, callout))) | |
congregation: Parser[List[Expression]] | |
congregation = pmap( | |
second, | |
chain( | |
lexeme(exactly('(')), | |
sep_by(expression, lexeme(exactly(','))), | |
lexeme(exactly(')')) | |
)) | |
def _callout_extract(t): | |
return { kv[0]: kv[2] for kv in t[1] } | |
callout: Parser[Dict[str, Expression]] | |
callout = pmap( | |
_callout_extract, | |
chain( | |
lexeme(exactly('{')), | |
sep_by(chain( | |
lexeme(string), | |
lexeme(exactly('!')), | |
expression | |
), | |
lexeme(exactly('&'))), | |
lexeme(exactly('}')) | |
)) | |
# since `lexeme` skips trailing whitespace, we need to allow leading whitespace | |
# before a document explicitly, then ensure end-of-input | |
document: Parser[Expression] | |
document = pmap(second, chain(whitespace, only(expression))) | |
def main(argv: List[str]) -> int: | |
for line in sys.stdin: | |
res = document(line) | |
if res is None: | |
print('> parse error', file=sys.stderr) | |
else: | |
x, _ = res | |
print(f'> {x}') | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv)) |
# Copyright 2020 Derrick W. Turk / terminus data science, LLC | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import sys | |
from enum import Enum, auto | |
from typing import Any, Callable, Dict, List, NamedTuple, Tuple, TypeVar, Union | |
# OBAN AST definitions | |
class TriBool(Enum): | |
FALSE = auto() | |
TRUE = auto() | |
FILENOTFOUND = auto() | |
class Expression: | |
Wrappable = Union[ | |
int, | |
str, | |
TriBool, | |
List['Expression'], | |
Dict[str, 'Expression'] | |
] | |
def __init__(self, value: Wrappable): | |
self.value = value | |
def __repr__(self): | |
return f'Expression({repr(self.value)})' | |
# this time, we'll parse into a monad with state, context, | |
# and a richer notion of failure | |
class ParserState(NamedTuple): | |
pos: int = 0 | |
# methods for producing updated states | |
def advance(self, chars: int) -> 'ParserState': | |
return self._replace(pos = self.pos + chars) | |
# is the parser at the end of input? | |
def is_eof(self, input: str) -> bool: | |
return self.pos >= len(input) | |
# examine the next n chars from the input, starting at pos | |
def slice(self, input: str, chars: int) -> str: | |
return input[self.pos : self.pos + chars] | |
# fail at the current pos with a given 'expected' message | |
def fail(self, expected: str, committed: bool = False) -> 'ParseError': | |
return ParseError(self.pos, expected, committed) | |
class ParseError(NamedTuple): | |
pos: int | |
expected: str | |
committed: bool = False # is this error "un-backtrackable"? see `alt`. | |
_T = TypeVar('_T') | |
ParseResult = Union[Tuple[_T, ParserState], ParseError] | |
Parser = Callable[[str, ParserState], ParseResult[_T]] | |
# a parser which can't fail | |
InfallibleParser = Callable[[str, ParserState], Tuple[_T, ParserState]] | |
def exactly(match: str) -> Parser[str]: | |
def parser(input: str, state: ParserState) -> ParseResult[str]: | |
if state.slice(input, len(match)) == match: | |
return match, state.advance(len(match)) | |
return state.fail(f'"{match}"') | |
return parser | |
eof: Parser[None] | |
eof = lambda input, state: ( | |
(None, state) if state.is_eof(input) | |
else state.fail('end of input') | |
) | |
char: Parser[str] | |
char = lambda input, state: ( | |
(state.slice(input, 1), state.advance(1)) if not state.is_eof(input) | |
else state.fail('any character') | |
) | |
# we could build this with char, but we'll write it more efficiently by | |
# directly inspecting the input one character at a time | |
def chars_while(p: Callable[[str], bool]) -> InfallibleParser[str]: | |
def parser(input: str, state: ParserState) -> Tuple[str, ParserState]: | |
chars: int = 0 | |
while state.pos + chars < len(input) and p(input[state.pos + chars]): | |
chars += 1 | |
return state.slice(input, chars), state.advance(chars) | |
return parser | |
whitespace: Parser[str] | |
whitespace = chars_while(str.isspace) | |
# combinators work like before - but there will be a little more plumbing | |
# in the error case | |
_U = TypeVar('_U') | |
# map a function over a parser's result, if it succeeds | |
def pmap(f: Callable[[_T], _U], p: Parser[_T]) -> Parser[_U]: | |
def parser(input: str, state: ParserState) -> ParseResult[_U]: | |
res = p(input, state) | |
if isinstance(res, ParseError): | |
return res | |
x, new_state = res | |
return f(x), new_state | |
return parser | |
# a parser which always succeeds by producing a constant | |
def pure(x: _T) -> InfallibleParser[_T]: | |
return lambda _, state: (x, state) | |
# the opposite - a parser which always fails (with a given "expected" message) | |
def fail(expected: str) -> Parser[_T]: | |
return lambda _, state: state.fail(expected) | |
# replace a successful result by a constant | |
def cmap(x: _T, p: Parser[_U]) -> Parser[_T]: | |
return pmap(lambda _: x, p) | |
# these are harder to type in Python - we can do them for the fixed-arity case, | |
# but it's just too convenient to let these take *args | |
# chain multiple parsers together and produce a tuple of their results, | |
# if all succeed | |
def chain(*parsers: Parser[Any]) -> Parser[Tuple[Any, ...]]: | |
def parser(input: str, state: ParserState) -> ParseResult[Tuple[Any, ...]]: | |
results = list() | |
for p in parsers: | |
res = p(input, state) | |
if isinstance(res, ParseError): | |
return res | |
val, state = res | |
results.append(val) | |
return tuple(results), state | |
return parser | |
# run multiple parsers, recovering from errors, until one succeeds; | |
# fail if all parsers fail | |
# this parser introduces a new issue: "unbacktrackable" errors | |
# we usually want `alt` to backtrack when a sub-parser fails, so that it can | |
# try the next parser in the sequence without consuming any input. | |
# however, sometimes a sub-parser "knows" that the expected grammar is locked | |
# in, and we want `alt` to fail if that parser fails. | |
# consider parsing Python's `x if y else z`: once we see `if`, we know that | |
# we're inside a conditional expression - if we get a subsequent parse error, | |
# we would not want to go back and try parsing the whole thing as, say, a list | |
# comprehension! | |
def alt(*parsers: Parser[Any]) -> Parser[Any]: | |
def parser(input: str, state: ParserState) -> ParseResult[Any]: | |
expected: List[str] = list() | |
for p in parsers: | |
res = p(input, state) | |
if not isinstance(res, ParseError): | |
return res | |
if res.committed: | |
return res # these errors can't be backtracked | |
expected.append(res.expected) | |
# take a crack at an "expected" message by combining sub-parser messages | |
return state.fail(', or '.join(expected)) | |
return parser | |
# run a parser, but make any errors "committed"/"unbacktrackable" | |
def commit(p: Parser[_T]) -> Parser[_T]: | |
def parser(input: str, state: ParserState) -> ParseResult: | |
res = p(input, state) | |
if isinstance(res, ParseError): | |
return res._replace(committed=True) | |
return res | |
return parser | |
# capture zero or more repetitions of a parser | |
# the end-of-loop condition is a form of backtracking, so we need to check | |
# for a committed error | |
def many(p: Parser[_T]) -> Parser[List[_T]]: | |
def parser(input: str, state: ParserState) -> ParseResult[List[_T]]: | |
results: List[_T] = list() | |
while True: | |
res = p(input, state) | |
if isinstance(res, ParseError): | |
if res.committed: | |
return res | |
return results, state | |
val, state = res | |
results.append(val) | |
return parser | |
# we can use alt to define an "optional" parser combinator, which allows but does | |
# not require its base parser to match | |
def optional(p: Parser[_T]) -> Parser[Union[_T, None]]: | |
return alt(p, pure(None)) | |
# finally, a way to "decide" the next parser based on | |
# the result of the previous - this is what makes Parser a monad | |
def bind(p: Parser[_T], f: Callable[[_T], Parser[_U]]) -> Parser[_U]: | |
def parser(input: str, state: ParserState) -> ParseResult[_U]: | |
res = p(input, state) | |
if isinstance(res, ParseError): | |
return res | |
x, state = res | |
return f(x)(input, state) | |
return parser | |
# we can use bind to build some interesting extensions to previous | |
# parsers | |
# like chars_while, but requires there to be at least one matching char | |
# rather than "guess" an expected message, take it as an argument | |
def chars_while1(p: Callable[[str], bool], expected: str) -> Parser[str]: | |
return bind(chars_while(p), lambda c: ( | |
pure(c) if len(c) > 0 else fail(expected) | |
)) | |
# use it to build a "run of digits" parser | |
digits: Parser[str] | |
digits = chars_while1(str.isdigit, 'digits') | |
# use it to build an "unsigned integer" parser | |
integer: Parser[int] | |
integer = pmap(int, digits) | |
# capture one or more repetitions of a parser | |
def some(p: Parser[_T], expected: str) -> Parser[List[_T]]: | |
return bind(many(p), lambda c: ( | |
pure(c) if len(c) > 0 else fail(expected) | |
)) | |
# now we can start building some more interesting combinators | |
# mypy hates type-checking lambdas as arguments to pmap, so we'll need some | |
# tuple-extractor functions | |
def first(t: Tuple[Any, ...]) -> Any: | |
return t[0] | |
def second(t: Tuple[Any, ...]) -> Any: | |
return t[1] | |
# "only" p - match what p matches, then insist on end-of-input | |
def only(p: Parser[_T]) -> Parser[_T]: | |
return pmap(first, chain(p, eof)) | |
# match p, followed by optional whitespace | |
def lexeme(p: Parser[_T]) -> Parser[_T]: | |
return pmap(first, chain(p, whitespace)) | |
# match zero or more p, separated by sep | |
def sep_by(p: Parser[_T], sep: Parser[_U]) -> Parser[List[_T]]: | |
# mypy hates this thing's type | |
# it should be: | |
# def extract(t: Union[Tuple[_T, List[Tuple[_U, _T]]], None]) -> List[_T]: | |
# but that fails inference when we use it as an argument to pmap | |
def extract(t): | |
if t is None: | |
return [] | |
return [t[0]] + [u[1] for u in t[1]] | |
return pmap( | |
extract, | |
optional(chain(p, many(chain(sep, commit(p)))))) | |
# now we can write our recursive descent parser for OBAN | |
# we already have `integer` | |
tribool: Parser[TriBool] | |
tribool = alt( | |
cmap(TriBool.FALSE, exactly('False')), | |
cmap(TriBool.TRUE, exactly('True')), | |
cmap(TriBool.FILENOTFOUND, exactly('FileNotFound')) | |
) | |
# again, mypy isn't smart enough to follow the type of this "lambda" | |
def _string_extract(t): | |
_1, (parts, _2) = t | |
return ''.join(p[1] if isinstance(p, tuple) else p for p in parts) | |
string: Parser[str] | |
string = pmap( | |
_string_extract, | |
chain( | |
exactly('<<'), | |
commit(chain( | |
many(alt( | |
chars_while1(lambda c: c != '>' and c != '^', | |
'non-escape characters'), | |
chain(exactly('^'), char) | |
)), | |
exactly('>>') | |
)) | |
)) | |
# there's a circular dependency between congregation, callout, and expression | |
# we'll break it in expression, by making it a "proper" function | |
def expression(input: str, state: ParserState) -> ParseResult[Expression]: | |
parser = pmap( | |
Expression, | |
lexeme(alt(integer, tribool, string, congregation, callout))) | |
return parser(input, state) | |
# in a lazier world, this would work: | |
# expression: Parser[Expression] | |
# expression = pmap( | |
# Expression, | |
# lexeme(alt(integer, tribool, string, congregation, callout))) | |
def _congregation_extract(t): | |
return t[1][0] | |
congregation: Parser[List[Expression]] | |
congregation = pmap( | |
_congregation_extract, | |
chain( | |
lexeme(exactly('(')), | |
commit(chain( | |
sep_by(expression, lexeme(exactly(','))), | |
lexeme(exactly(')')))) | |
)) | |
def _callout_extract(t): | |
return { kv[0]: kv[2] for kv in t[1][0] } | |
callout: Parser[Dict[str, Expression]] | |
callout = pmap( | |
_callout_extract, | |
chain( | |
lexeme(exactly('{')), | |
commit(chain( | |
sep_by(chain( | |
lexeme(string), | |
lexeme(exactly('!')), | |
expression | |
), | |
lexeme(exactly('&'))), | |
lexeme(exactly('}')) | |
)) | |
)) | |
# since `lexeme` skips trailing whitespace, we need to allow leading whitespace | |
# before a document explicitly, then ensure end-of-input | |
document: Parser[Expression] | |
document = pmap(second, chain(whitespace, only(expression))) | |
def main(argv: List[str]) -> int: | |
for line in sys.stdin: | |
res = document(line.rstrip(), ParserState()) | |
if isinstance(res, ParseError): | |
print(f"{' ' * res.pos}^ expected: {res.expected}", file=sys.stderr) | |
else: | |
x, _ = res | |
print(f'> {x}') | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv)) |
# Copyright 2020 Derrick W. Turk / terminus data science, LLC | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import sys | |
from enum import Enum, auto | |
from typing import Callable, Dict, List, NamedTuple, Tuple, TypeVar, Union | |
class ParserState(NamedTuple): | |
input: str | |
pos: int = 0 | |
class ParseError(NamedTuple): | |
expected: str | |
pos: int | |
# allow backtracking on failure (True), or commit to this error (False)? | |
backtrack: bool = True | |
_T = TypeVar('_T') | |
ParseResult = Union[Tuple[_T, ParserState], ParseError] | |
# Parser = Callable[[ParserState], ParseResult[_T]] | |
# (useless, because we can't write | |
# p : Parser[int] | |
# def p(state: ParserState) -> ParseResult[int]: | |
# ... | |
# ) | |
def whitespace(state: ParserState) -> Tuple[None, ParserState]: | |
pos = state.pos | |
while pos < len(state.input) and state.input[pos].isspace(): | |
pos += 1 | |
return (None, ParserState(state.input, pos)) | |
def integer(state: ParserState) -> ParseResult[int]: | |
_, state = whitespace(state) | |
pos = state.pos | |
if not state.input[pos].isdigit(): | |
return ParseError('integer', pos) | |
num = '' | |
while pos < len(state.input) and state.input[pos].isdigit(): | |
num += state.input[pos] | |
pos += 1 | |
return (int(num), ParserState(state.input, pos)) | |
def string(state: ParserState) -> ParseResult[str]: | |
_, state = whitespace(state) | |
pos = state.pos | |
if state.input[pos:pos+2] != '<<': | |
return ParseError('<<', pos) | |
pos += 2 | |
text = '' | |
while pos < len(state.input): | |
if state.input[pos] == '^': | |
if pos + 1 >= len(state.input): | |
return ParseError('any character', pos + 1, backtrack = False) | |
text += state.input[pos+1] | |
pos += 2 | |
elif state.input[pos:pos+2] == '>>': | |
pos += 2 | |
return (text, ParserState(state.input, pos)) | |
else: | |
text += state.input[pos] | |
pos += 1 | |
return ParseError('>>', pos, backtrack = False) | |
class TriBool(Enum): | |
FALSE = auto() | |
TRUE = auto() | |
FILENOTFOUND = auto() | |
def tribool(state: ParserState) -> ParseResult[TriBool]: | |
_, state = whitespace(state) | |
pos = state.pos | |
if state.input[pos:pos+5] == 'False': | |
pos += 5 | |
return (TriBool.FALSE, ParserState(state.input, pos)) | |
if state.input[pos:pos+4] == 'True': | |
pos += 4 | |
return (TriBool.TRUE, ParserState(state.input, pos)) | |
if state.input[pos:pos+12] == 'FileNotFound': | |
pos += 12 | |
return (TriBool.FILENOTFOUND, ParserState(state.input, pos)) | |
return ParseError('tribool', pos) | |
# there's no way to write a recursive type alias: | |
# we'd prefer | |
# Expression = Union[int, str, TriBool, List[Expression], Dict[str, Expression]] | |
# but we can't, so we'll wrap it in a class | |
class Expression: | |
# type alias for what we can wrap into an expression | |
Wrappable = Union[ | |
int, | |
str, | |
TriBool, | |
List['Expression'], | |
Dict[str, 'Expression'] | |
] | |
def __init__(self, value: Wrappable): | |
self.value = value | |
def __repr__(self): | |
return f'Expression({repr(self.value)})' | |
def congregation(state: ParserState) -> ParseResult[List[Expression]]: | |
_, state = whitespace(state) | |
if state.pos >= len(state.input) or state.input[state.pos] != '(': | |
return ParseError('(', state.pos) | |
state = ParserState(state.input, state.pos + 1) | |
_, state = whitespace(state) | |
exprs: List[Expression] = list() | |
# the first expression (no comma) | |
expr_res = expression(state) | |
if isinstance(expr_res, ParseError): | |
if not expr_res.backtrack: | |
return expr_res | |
if state.pos < len(state.input) and state.input[state.pos] == ')': | |
return (exprs, ParserState(state.input, state.pos + 1)) | |
else: | |
return ParseError('expression or )', state.pos, backtrack = False) | |
expr, state = expr_res | |
exprs.append(expr) | |
while True: | |
_, state = whitespace(state) | |
pos = state.pos | |
if pos < len(state.input) and state.input[pos] == ')': | |
pos += 1 | |
return (exprs, ParserState(state.input, pos)) | |
if pos < len(state.input) and state.input[pos] == ',': | |
state = ParserState(state.input, pos + 1) | |
_, state = whitespace(state) | |
expr_res = expression(state) | |
if isinstance(expr_res, ParseError): | |
return expr_res._replace(backtrack = False) | |
expr, state = expr_res | |
exprs.append(expr) | |
else: | |
return ParseError(', or )', pos, backtrack = False) | |
def callout(state: ParserState) -> ParseResult[Dict[str, Expression]]: | |
_, state = whitespace(state) | |
if state.pos >= len(state.input) or state.input[state.pos] != '{': | |
return ParseError('{', state.pos) | |
state = ParserState(state.input, state.pos + 1) | |
_, state = whitespace(state) | |
exprs: Dict[str, Expression] = dict() | |
# the first mapping (no &) | |
label_res = string(state) | |
if isinstance(label_res, ParseError): | |
if not label_res.backtrack: | |
return label_res | |
if state.pos < len(state.input) and state.input[state.pos] == '}': | |
return (exprs, ParserState(state.input, state.pos + 1)) | |
else: | |
return ParseError('string or }', state.pos, backtrack = False) | |
label, state = label_res | |
_, state = whitespace(state) | |
if state.pos < len(state.input) and state.input[state.pos] == '!': | |
state = ParserState(state.input, state.pos + 1) | |
_, state = whitespace(state) | |
else: | |
return ParseError('!', state.pos, backtrack = False) | |
_, state = whitespace(state) | |
expr_res = expression(state) | |
if isinstance(expr_res, ParseError): | |
return expr_res._replace(backtrack = False) | |
expr, state = expr_res | |
exprs[label] = expr | |
while True: | |
_, state = whitespace(state) | |
pos = state.pos | |
if pos < len(state.input) and state.input[pos] == '}': | |
pos += 1 | |
return (exprs, ParserState(state.input, pos)) | |
if pos < len(state.input) and state.input[pos] == '&': | |
state = ParserState(state.input, pos + 1) | |
_, state = whitespace(state) | |
label_res = string(state) | |
if isinstance(label_res, ParseError): | |
return label_res._replace(backtrack = False) | |
label, state = label_res | |
_, state = whitespace(state) | |
if state.pos < len(state.input) and state.input[state.pos] == '!': | |
state = ParserState(state.input, state.pos + 1) | |
_, state = whitespace(state) | |
else: | |
return ParseError('!', state.pos, backtrack = False) | |
_, state = whitespace(state) | |
expr_res = expression(state) | |
if isinstance(expr_res, ParseError): | |
return expr_res._replace(backtrack = False) | |
expr, state = expr_res | |
exprs[label] = expr | |
else: | |
return ParseError('& or }', pos, backtrack = False) | |
def expression(state: ParserState) -> ParseResult[Expression]: | |
_, state = whitespace(state) | |
# first, try to parse as an integer | |
int_res = integer(state) | |
if not isinstance(int_res, ParseError): | |
num, state = int_res | |
return (Expression(num), state) | |
elif not int_res.backtrack: | |
return int_res | |
# then, as a tribool | |
tribool_res = tribool(state) | |
if not isinstance(tribool_res, ParseError): | |
tb, state = tribool_res | |
return (Expression(tb), state) | |
elif not tribool_res.backtrack: | |
return tribool_res | |
# then, as a string | |
str_res = string(state) | |
if not isinstance(str_res, ParseError): | |
text, state = str_res | |
return (Expression(text), state) | |
elif not str_res.backtrack: | |
return str_res | |
# then, as a congregation | |
cong_res = congregation(state) | |
if not isinstance(cong_res, ParseError): | |
cong, state = cong_res | |
return (Expression(cong), state) | |
elif not cong_res.backtrack: | |
return cong_res | |
# finally, as a callout | |
call_res = callout(state) | |
if not isinstance(call_res, ParseError): | |
call, state = call_res | |
return (Expression(call), state) | |
elif not call_res.backtrack: | |
return call_res | |
return ParseError('expression', state.pos) | |
# one, and only one, expression | |
def document(state: ParserState) -> ParseResult[Expression]: | |
res = expression(state) | |
if isinstance(res, ParseError): | |
return res | |
expr, state = res | |
_, state = whitespace(state) | |
if state.pos != len(state.input): | |
return ParseError('end of document', state.pos) | |
return expr, state | |
def main(argv: List[str]) -> int: | |
for line in sys.stdin: | |
res = document(ParserState(line, 0)) | |
if isinstance(res, ParseError): | |
print(f'{" " * res.pos}^ expected {res.expected}', file=sys.stderr) | |
continue | |
parsed, _ = res | |
print(parsed) | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv)) |
Apache License | |
Version 2.0, January 2004 | |
http://www.apache.org/licenses/ | |
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION | |
1. Definitions. | |
"License" shall mean the terms and conditions for use, reproduction, | |
and distribution as defined by Sections 1 through 9 of this document. | |
"Licensor" shall mean the copyright owner or entity authorized by | |
the copyright owner that is granting the License. | |
"Legal Entity" shall mean the union of the acting entity and all | |
other entities that control, are controlled by, or are under common | |
control with that entity. For the purposes of this definition, | |
"control" means (i) the power, direct or indirect, to cause the | |
direction or management of such entity, whether by contract or | |
otherwise, or (ii) ownership of fifty percent (50%) or more of the | |
outstanding shares, or (iii) beneficial ownership of such entity. | |
"You" (or "Your") shall mean an individual or Legal Entity | |
exercising permissions granted by this License. | |
"Source" form shall mean the preferred form for making modifications, | |
including but not limited to software source code, documentation | |
source, and configuration files. | |
"Object" form shall mean any form resulting from mechanical | |
transformation or translation of a Source form, including but | |
not limited to compiled object code, generated documentation, | |
and conversions to other media types. | |
"Work" shall mean the work of authorship, whether in Source or | |
Object form, made available under the License, as indicated by a | |
copyright notice that is included in or attached to the work | |
(an example is provided in the Appendix below). | |
"Derivative Works" shall mean any work, whether in Source or Object | |
form, that is based on (or derived from) the Work and for which the | |
editorial revisions, annotations, elaborations, or other modifications | |
represent, as a whole, an original work of authorship. For the purposes | |
of this License, Derivative Works shall not include works that remain | |
separable from, or merely link (or bind by name) to the interfaces of, | |
the Work and Derivative Works thereof. | |
"Contribution" shall mean any work of authorship, including | |
the original version of the Work and any modifications or additions | |
to that Work or Derivative Works thereof, that is intentionally | |
submitted to Licensor for inclusion in the Work by the copyright owner | |
or by an individual or Legal Entity authorized to submit on behalf of | |
the copyright owner. For the purposes of this definition, "submitted" | |
means any form of electronic, verbal, or written communication sent | |
to the Licensor or its representatives, including but not limited to | |
communication on electronic mailing lists, source code control systems, | |
and issue tracking systems that are managed by, or on behalf of, the | |
Licensor for the purpose of discussing and improving the Work, but | |
excluding communication that is conspicuously marked or otherwise | |
designated in writing by the copyright owner as "Not a Contribution." | |
"Contributor" shall mean Licensor and any individual or Legal Entity | |
on behalf of whom a Contribution has been received by Licensor and | |
subsequently incorporated within the Work. | |
2. Grant of Copyright License. Subject to the terms and conditions of | |
this License, each Contributor hereby grants to You a perpetual, | |
worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |
copyright license to reproduce, prepare Derivative Works of, | |
publicly display, publicly perform, sublicense, and distribute the | |
Work and such Derivative Works in Source or Object form. | |
3. Grant of Patent License. Subject to the terms and conditions of | |
this License, each Contributor hereby grants to You a perpetual, | |
worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |
(except as stated in this section) patent license to make, have made, | |
use, offer to sell, sell, import, and otherwise transfer the Work, | |
where such license applies only to those patent claims licensable | |
by such Contributor that are necessarily infringed by their | |
Contribution(s) alone or by combination of their Contribution(s) | |
with the Work to which such Contribution(s) was submitted. If You | |
institute patent litigation against any entity (including a | |
cross-claim or counterclaim in a lawsuit) alleging that the Work | |
or a Contribution incorporated within the Work constitutes direct | |
or contributory patent infringement, then any patent licenses | |
granted to You under this License for that Work shall terminate | |
as of the date such litigation is filed. | |
4. Redistribution. You may reproduce and distribute copies of the | |
Work or Derivative Works thereof in any medium, with or without | |
modifications, and in Source or Object form, provided that You | |
meet the following conditions: | |
(a) You must give any other recipients of the Work or | |
Derivative Works a copy of this License; and | |
(b) You must cause any modified files to carry prominent notices | |
stating that You changed the files; and | |
(c) You must retain, in the Source form of any Derivative Works | |
that You distribute, all copyright, patent, trademark, and | |
attribution notices from the Source form of the Work, | |
excluding those notices that do not pertain to any part of | |
the Derivative Works; and | |
(d) If the Work includes a "NOTICE" text file as part of its | |
distribution, then any Derivative Works that You distribute must | |
include a readable copy of the attribution notices contained | |
within such NOTICE file, excluding those notices that do not | |
pertain to any part of the Derivative Works, in at least one | |
of the following places: within a NOTICE text file distributed | |
as part of the Derivative Works; within the Source form or | |
documentation, if provided along with the Derivative Works; or, | |
within a display generated by the Derivative Works, if and | |
wherever such third-party notices normally appear. The contents | |
of the NOTICE file are for informational purposes only and | |
do not modify the License. You may add Your own attribution | |
notices within Derivative Works that You distribute, alongside | |
or as an addendum to the NOTICE text from the Work, provided | |
that such additional attribution notices cannot be construed | |
as modifying the License. | |
You may add Your own copyright statement to Your modifications and | |
may provide additional or different license terms and conditions | |
for use, reproduction, or distribution of Your modifications, or | |
for any such Derivative Works as a whole, provided Your use, | |
reproduction, and distribution of the Work otherwise complies with | |
the conditions stated in this License. | |
5. Submission of Contributions. Unless You explicitly state otherwise, | |
any Contribution intentionally submitted for inclusion in the Work | |
by You to the Licensor shall be under the terms and conditions of | |
this License, without any additional terms or conditions. | |
Notwithstanding the above, nothing herein shall supersede or modify | |
the terms of any separate license agreement you may have executed | |
with Licensor regarding such Contributions. | |
6. Trademarks. This License does not grant permission to use the trade | |
names, trademarks, service marks, or product names of the Licensor, | |
except as required for reasonable and customary use in describing the | |
origin of the Work and reproducing the content of the NOTICE file. | |
7. Disclaimer of Warranty. Unless required by applicable law or | |
agreed to in writing, Licensor provides the Work (and each | |
Contributor provides its Contributions) on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | |
implied, including, without limitation, any warranties or conditions | |
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A | |
PARTICULAR PURPOSE. You are solely responsible for determining the | |
appropriateness of using or redistributing the Work and assume any | |
risks associated with Your exercise of permissions under this License. | |
8. Limitation of Liability. In no event and under no legal theory, | |
whether in tort (including negligence), contract, or otherwise, | |
unless required by applicable law (such as deliberate and grossly | |
negligent acts) or agreed to in writing, shall any Contributor be | |
liable to You for damages, including any direct, indirect, special, | |
incidental, or consequential damages of any character arising as a | |
result of this License or out of the use or inability to use the | |
Work (including but not limited to damages for loss of goodwill, | |
work stoppage, computer failure or malfunction, or any and all | |
other commercial damages or losses), even if such Contributor | |
has been advised of the possibility of such damages. | |
9. Accepting Warranty or Additional Liability. While redistributing | |
the Work or Derivative Works thereof, You may choose to offer, | |
and charge a fee for, acceptance of support, warranty, indemnity, | |
or other liability obligations and/or rights consistent with this | |
License. However, in accepting such obligations, You may act only | |
on Your own behalf and on Your sole responsibility, not on behalf | |
of any other Contributor, and only if You agree to indemnify, | |
defend, and hold each Contributor harmless for any liability | |
incurred by, or claims asserted against, such Contributor by reason | |
of your accepting any such warranty or additional liability. | |
END OF TERMS AND CONDITIONS | |
APPENDIX: How to apply the Apache License to your work. | |
To apply the Apache License to your work, attach the following | |
boilerplate notice, with the fields enclosed by brackets "[]" | |
replaced with your own identifying information. (Don't include | |
the brackets!) The text should be enclosed in the appropriate | |
comment syntax for the file format. We also recommend that a | |
file or class name and description of purpose be included on the | |
same "printed page" as the copyright notice for easier | |
identification within third-party archives. | |
Copyright [yyyy] [name of copyright owner] | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment