Skip to content

Instantly share code, notes, and snippets.

@niw
Created July 13, 2023 06:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save niw/9a6c29b1b42e7931a97f6506dcd118a5 to your computer and use it in GitHub Desktop.
Save niw/9a6c29b1b42e7931a97f6506dcd118a5 to your computer and use it in GitHub Desktop.
Simple parser combinator, lexer, paresr and evaluator for double value math for simple logic configuration
import Foundation
extension String: Error {}
// MARK: - Parser Combinator
typealias ParserFunction<Element, Output> =
(any Collection<Element>) throws -> (Output, any Collection<Element>)
protocol Parser<Element, Output> {
associatedtype Element
associatedtype Output
func apply(_ input: some Collection<Element>) throws -> (Output, any Collection<Element>)
}
struct FunctionParser<Element, Output>: Parser {
var function: ParserFunction<Element, Output>
func apply(_ input: some Collection<Element>) throws -> (Output, any Collection<Element>) {
try function(input)
}
}
func parser<Element, Output>(
_ function: @escaping ParserFunction<Element, Output>
) -> some Parser<Element, Output> {
FunctionParser(function: function)
}
struct ResultParser<Element, Output>: Parser {
var output: Output
func apply(_ input: some Collection<Element>) throws -> (Output, any Collection<Element>) {
(output, input)
}
}
func result<Element, Output>(_ output: Output) -> some Parser<Element, Output> {
ResultParser(output: output)
}
extension Collection {
var tail: any Collection<Element> {
dropFirst()
}
}
struct ConsumeParser<Element>: Parser {
func apply(_ input: some Collection<Element>) throws -> (Element, any Collection<Element>) {
guard let element = input.first else {
throw "No element."
}
return (element, input.tail)
}
}
func consume<Element>() -> some Parser<Element, Element> {
ConsumeParser()
}
extension Parser {
func bind<T>(_ factory: @escaping (Output) throws -> some Parser<Element, T>) -> some Parser<Element, T> {
parser { input in
let (output, remaining) = try apply(input)
let parser = try factory(output)
return try parser.apply(remaining)
}
}
func satisfy(_ condition: @escaping (Output) throws -> Bool) -> some Parser<Element, Output> {
bind { output in
guard try condition(output) else {
throw "Not satisfied."
}
// Type can't be inferred here if it is `result(output)` somehow.
return ResultParser<Element, Output>(output: output)
}
}
func map<T>(_ transform: @escaping (Output) throws -> T) -> some Parser<Element, T> {
bind { output in
result(try transform(output))
}
}
func or(_ other: @escaping () -> some Parser<Element, Output>) -> some Parser<Element, Output> {
parser { input in
do {
return try apply(input)
} catch {
return try other().apply(input)
}
}
}
}
extension Parser {
typealias RepeatParser = Parser<Element, any Collection<Output>>
var one: some RepeatParser {
map { output in
[output]
}
}
var zeroOrOne: some RepeatParser {
one.or { result([]) }
}
var zeroOrMore: some RepeatParser {
parser { input in
var outputs = [Output]()
var nextInput = input
do {
while true {
let (output, remaining) = try apply(nextInput)
outputs.append(output)
nextInput = remaining
}
} catch {
}
return (outputs, nextInput)
}
}
var oneOrMore: some RepeatParser {
one.bind { first in
zeroOrMore.bind { others in
result(Array(first) + Array(others))
}
}
}
}
extension CharacterSet {
var parser: some Parser<Unicode.Scalar, Unicode.Scalar> {
consume().satisfy { unicodeScalar in
contains(unicodeScalar)
}
}
}
extension String {
var characterSet: CharacterSet {
CharacterSet(charactersIn: self)
}
}
extension Unicode.Scalar {
var parser: some Parser<Unicode.Scalar, Unicode.Scalar> {
consume().satisfy { unicodeScalar in
unicodeScalar == self
}
}
}
func or<Element, Output>(_ parsers: any Parser<Element, Output>...) -> some Parser<Element, Output> {
parser { input in
for parser in parsers {
do {
return try parser.apply(input)
} catch {
}
}
throw "Nothing matched"
}
}
func sequence<Element, Output>(_ parsers: any Parser<Element, any Collection<Output>>...) -> some Parser<Element, any Collection<Output>> {
parser { input in
var outputs = [Output]()
var nextInput = input
for parser in parsers {
let (output, remaining) = try parser.apply(nextInput)
outputs.append(contentsOf: output)
nextInput = remaining
}
return (outputs, nextInput)
}
}
// MARK: - Tokenizer
enum Token: Equatable {
case name(String)
case number(Double)
case plusOperator
case minusOperator
case multiplyOperator
case divideOperator
case leftParentheses
case rightParentheses
}
typealias TokenParser = Parser<Unicode.Scalar, Token>
let sign = "+-".characterSet.parser
let exponentialIndicator = "eE".characterSet.parser
let digit = CharacterSet.decimalDigits.parser
let zeroDigit = "0".unicodeScalars.first!.parser
let nonZeroDigit = "123456789".characterSet.parser
let negativeSign = "-".unicodeScalars.first!.parser
let period = ".".unicodeScalars.first!.parser
let exponentialPart = sequence(
exponentialIndicator.one,
sign.zeroOrOne,
digit.oneOrMore
)
let fractionPart = sequence(period.one, digit.oneOrMore)
let integerPart = or(
sequence(negativeSign.zeroOrOne, nonZeroDigit.one, digit.zeroOrMore),
sequence(negativeSign.zeroOrOne, zeroDigit.one)
)
let floatPart = or(
sequence(integerPart, fractionPart, exponentialPart),
sequence(integerPart, fractionPart),
sequence(integerPart, exponentialPart)
)
let number = or (
floatPart,
integerPart
)
let numberToken: some TokenParser = number.bind { output in
guard let double = Double(String(String.UnicodeScalarView(output))) else {
throw "Not number expression"
}
return ResultParser<Unicode.Scalar, Token>(output: .number(double))
}
let namePartPrefixCharacter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".characterSet.parser
let namePartCharacter = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".characterSet.parser
let namePart = sequence(namePartPrefixCharacter.one, namePartCharacter.zeroOrMore)
let name = or(
// Not using `sequence` due to recursive `name` call.
namePart.bind { a in
period.one.bind { b in
name.bind { c in
result(Array(a) + Array(b) + Array(c))
}
}
},
namePart
)
let nameToken: some TokenParser = name.bind { output in
ResultParser<Unicode.Scalar, Token>(output: .name(String(String.UnicodeScalarView(output))))
}
let leftParenthesesToken: some TokenParser = "(".unicodeScalars.first!.parser.bind { _ in
result(.leftParentheses)
}
let rightParenthesesToken: some TokenParser = ")".unicodeScalars.first!.parser.bind { _ in
result(.rightParentheses)
}
let plusOperatorToken: some TokenParser = "+".unicodeScalars.first!.parser.bind { _ in
result(.plusOperator)
}
let minusOperatorToken: some TokenParser = "-".unicodeScalars.first!.parser.bind { _ in
result(.minusOperator)
}
let multiplyOperatorToken: some TokenParser = "*".unicodeScalars.first!.parser.bind { _ in
result(.multiplyOperator)
}
let divideOperatorToken: some TokenParser = "/".unicodeScalars.first!.parser.bind { _ in
result(.divideOperator)
}
let whitespace = CharacterSet.whitespacesAndNewlines.parser
func ignoreWhiteSpaces<Output>(then parser: @autoclosure @escaping () -> some Parser<Unicode.Scalar, Output>) -> some Parser<Unicode.Scalar, Output> {
whitespace.zeroOrMore.bind { _ in
parser()
}
}
let tokenizer: some Parser<Unicode.Scalar, any Collection<Token>> = or(
ignoreWhiteSpaces(then: leftParenthesesToken),
ignoreWhiteSpaces(then: rightParenthesesToken),
ignoreWhiteSpaces(then: plusOperatorToken),
ignoreWhiteSpaces(then: minusOperatorToken),
ignoreWhiteSpaces(then: multiplyOperatorToken),
ignoreWhiteSpaces(then: divideOperatorToken),
ignoreWhiteSpaces(then: nameToken),
ignoreWhiteSpaces(then: numberToken)
).oneOrMore
// MARK: - Parser
enum Expression {
case symbol(String)
case number(Double)
indirect case add(Expression, Expression)
indirect case subtract(Expression, Expression)
indirect case multiply(Expression, Expression)
indirect case divide(Expression, Expression)
}
typealias ExpressionParser = Parser<Token, Expression>
extension Token {
var parser: some Parser<Token, Token> {
consume().satisfy { $0 == self }
}
}
let _factor1: some ExpressionParser = Token.leftParentheses.parser.bind { _ in
expression.bind { output in
Token.rightParentheses.parser.bind { _ in
result(output)
}
}
}
let _factor2: some ExpressionParser = consume().bind { token in
switch token {
case .name(let name):
ResultParser<Token, Expression>(output: .symbol(name))
case .number(let number):
ResultParser<Token, Expression>(output: .number(number))
default:
throw "Not a name or a number"
}
}
let factor: some ExpressionParser = or(_factor1, _factor2)
let term: some ExpressionParser = or(
factor.bind { left in
or(
Token.multiplyOperator.parser.bind { _ in
term.bind { right in
ResultParser<Token, Expression>(output: .multiply(left, right))
}
},
Token.divideOperator.parser.bind { _ in
term.bind { right in
ResultParser<Token, Expression>(output: .divide(left, right))
}
}
)
},
factor
)
let expression: some ExpressionParser = or(
term.bind { left in
or(
Token.plusOperator.parser.bind { _ in
expression.bind { right in
ResultParser<Token, Expression>(output: .add(left, right))
}
},
Token.minusOperator.parser.bind { _ in
expression.bind { right in
ResultParser<Token, Expression>(output: .subtract(left, right))
}
}
)
},
term
)
// MARK: - Evaluator
let variables: [String: Double] = [
"cat.a": 3,
"cat.b": 40,
"dog.a": 10,
"dog.b": 20
]
func eval(_ expression: Expression) throws -> Double {
switch expression {
case .symbol(let symbol):
guard let value = variables[symbol] else {
throw "Unknown variable \(symbol)"
}
return value
case .number(let number):
return number
case .add(let left, let right):
return try eval(left) + eval(right)
case .subtract(let left, let right):
return try eval(left) - eval(right)
case .multiply(let left, let right):
return try eval(left) * eval(right)
case .divide(let left, let right):
return try eval(left) / eval(right)
}
}
do {
let (tokens, remaining_unicode_scalars) = try tokenizer.apply("cat.a + cat.b * 6 / 8)".unicodeScalars)
print(tokens)
print(Array(remaining_unicode_scalars))
let (expression, remaining_tokens) = try expression.apply(tokens)
print(expression)
print(Array(remaining_tokens))
print(try eval(expression))
} catch {
print(error)
}
// MARK: - Grammer
/*
// Parser
EXPRESSION:
term PLUS_OPERATOR expression
| term MINUS_OPERATOR expression
| term
TERM:
FACTOR MULTIPLY_OPERATOR TERM
| FACTOR DIVIDE_OPERATOR TERM
| FACTOR
FACTOR:
'(' EXPRESSION ')'
| NUMBER
| NAME
;
// Tokenizer
NAME:
NAME_PART
| NAME_PART '.' NAME
fragment NAME_PART:
[_A-Za-z] [_0-9A-Za-z];
PLUS_OPERATOR:
'+';
MINUS_OPERATOR:
'-';
MULTIPLY_OPERATOR:
'*';
DIVIDE_OPERATOR:
'/';
NUMBER:
INTEGER_PART
| FLOAT_PART
;
fragment INTEGER_PART:
NEGATIVE_SIGN? '0'
| NEGATIVE_SIGN? NON_ZERO_DIGIT DIGIT*
;
fragment FLOAT_PART:
INTEGER_PART FRACTIONAL_PART
| INTEGER_PART EXPONENTIAL_PART
| INTEGER_PART FRACTIONAL_PART EXPONENTIAL_PART
;
fragment FRACTIONAL_PART:
'.' DIGIT+;
fragment EXPONENTIAL_PART:
EXPONENT_INDICATOR SIGN? DIGIT+;
fragment NEGATIVE_SIGN:
'-';
fragment DIGIT:
[0-9];
fragment NON_ZERO_DIGIT:
[1-9];
fragment EXPONENT_INDICATOR:
[eE];
fragment SIGN:
[+-];
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment