Skip to content

Instantly share code, notes, and snippets.

@niw
Created July 12, 2023 04:15
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save niw/8ad9cadf7e3b6bb338e1edcf38f5664b to your computer and use it in GitHub Desktop.
Save niw/8ad9cadf7e3b6bb338e1edcf38f5664b to your computer and use it in GitHub Desktop.
Tiny SExpr Interpretor
import Foundation
extension String: Error {}
// MARK: - Evaluation
enum Atom {
case symbol(String)
case number(Double)
}
enum SExpr {
case atom(Atom)
case list(AnyCollection<SExpr>)
}
extension SExpr: ExpressibleByStringLiteral {
typealias StringLiteralType = String
init(stringLiteral value: String) {
self = .atom(.symbol(value))
}
}
extension SExpr: ExpressibleByFloatLiteral {
typealias FloatLiteralType = Double
init(floatLiteral value: Double) {
self = .atom(.number(value))
}
}
extension SExpr: ExpressibleByIntegerLiteral {
typealias IntegerLiteralType = Int
init(integerLiteral value: Int) {
self = .atom(.number(Double(value)))
}
}
extension SExpr: ExpressibleByArrayLiteral {
typealias ArrayLiteralElement = SExpr
init(arrayLiteral elements: SExpr...) {
self = .list(AnyCollection(elements))
}
}
extension SExpr: CustomStringConvertible {
var description: String {
switch self {
case .atom(let atom):
"\(atom)"
case .list(let list):
"[\(list.map{ $0.description }.joined(separator: " "))]"
}
}
}
func eval(_ expr: SExpr) throws -> SExpr {
switch expr {
case .atom:
return expr
case .list(let list):
if case .atom(.symbol(let symbol)) = list.first {
return try apply(symbol, arguments: list.dropFirst())
} else {
throw "Unexpected list \(expr)"
}
}
}
func apply(_ symbol: String, arguments: AnyCollection<SExpr>) throws -> SExpr {
let doubleArguments = try arguments.map { argument in
switch try eval(argument) {
case .atom(.number(let double)):
double
default:
throw "Unexpected type"
}
}
let op: (Double, Double) -> Double = switch symbol {
case "+": (+)
case "-": (-)
case "*": (*)
case "/": (/)
default:
throw "Unknown operator \(symbol)"
}
if let firstArgument = doubleArguments.first {
let restOfArguments = doubleArguments.dropFirst()
return .atom(.number(restOfArguments.reduce(firstArgument) { result, arg in
op(result, arg)
}))
} else {
throw "Unexpected arguments \(arguments)"
}
}
#if false
do {
print(try eval(["+", 1, ["*", 2.4, 3]]))
} catch {
print(error)
}
#endif
// MARK: - Parser Combinator
typealias Parser<Element, Output> =
(AnyCollection<Element>) throws -> (Output, AnyCollection<Element>)
func result<Element, Output>(_ output: Output) -> Parser<Element, Output> {
{ input in
(output, input)
}
}
func consume<Element>() -> Parser<Element, Element> {
{ input in
if let first = input.first {
return (first, AnyCollection(input.dropFirst()))
} else {
throw "Error"
}
}
}
func bind<Element, T, U>(
_ parser: @escaping Parser<Element, T>,
_ factory: @escaping (T) throws -> Parser<Element, U>
) -> Parser<Element, U> {
{ input in
let (output, remainder) = try parser(input)
let parser = try factory(output)
return try parser(remainder)
}
}
#if false
func parserA() -> Parser<Unicode.Scalar, Unicode.Scalar> {
bind(consume()) { output in
switch output {
case "a":
return result("a")
default:
throw "error"
}
}
}
#endif
func satisfy<Element>(
_ condition: @escaping (Element) -> Bool
) -> Parser<Element, Element> {
bind(consume()) { output in
if condition(output) {
return result(output)
} else {
throw "Not satisfied"
}
}
}
// Works only for a single character that can be expressed in a single Unicode scalar.
extension String {
var unicodeScalarParser: Parser<Unicode.Scalar, Unicode.Scalar> {
satisfy { $0 == self.unicodeScalars.first }
}
}
extension CharacterSet {
var parser: Parser<Unicode.Scalar, Unicode.Scalar> {
satisfy { character in
self.contains(character)
}
}
}
func or<Element, Output>(
_ lhs: @escaping Parser<Element, Output>,
_ rhs: @escaping Parser<Element, Output>
) -> Parser<Element, Output> {
{ input in
do {
return try lhs(input)
} catch {
return try rhs(input)
}
}
}
enum Either<T, U> {
case left(T)
case right(U)
}
func either<Element, T, U>(
_ lhs: @escaping Parser<Element, T>,
_ rhs: @escaping Parser<Element, U>
) -> Parser<Element, Either<T, U>> {
{ input in
do {
let (output, remaining) = try lhs(input)
return (.left(output), remaining)
} catch {
let (output, remaining) = try rhs(input)
return (.right(output), remaining)
}
}
}
func oneOrMore<Element, Output>(
_ parser: @escaping Parser<Element, Output>
) -> Parser<Element, AnyCollection<Output>> {
// this should be loop instead.
func recursive(_ buffer: [Output]) -> Parser<Element, AnyCollection<Output>> {
bind(parser) { output in
var buf = buffer
buf.append(output)
return or(recursive(buf), result(AnyCollection(buf)))
}
}
return recursive([])
}
func zeroOrMore<Element, Output>(
_ parser: @escaping Parser<Element, Output>
) -> Parser<Element, AnyCollection<Output>> {
or(oneOrMore(parser), result(AnyCollection([])))
}
#if false
do {
let input = AnyCollection("!123$".unicodeScalars)
let parser = zeroOrMore(or(CharacterSet.alphanumerics.parser, CharacterSet.symbols.parser))
let (output, remaining) = try parser(input)
print(Array(output))
print(Array(remaining))
} catch {
print(error)
}
#endif
// MARK: - Tokenizer
enum SExprToken: Equatable {
case leftParentheses
case rightParentheses
case symbol(String)
case number(Double)
}
extension SExprToken: CustomStringConvertible {
var description: String {
switch self {
case .leftParentheses:
"("
case .rightParentheses:
")"
case .symbol(let string):
string
case .number(let number):
"\(number)"
}
}
}
typealias SExprTokenParser = Parser<Unicode.Scalar, SExprToken>
let leftParenthesesParser: SExprTokenParser = bind("(".unicodeScalarParser) { _ in
result(.leftParentheses)
}
let rightParenthesesParser: SExprTokenParser = bind(")".unicodeScalarParser) { _ in
result(.rightParentheses)
}
let symbolParser: SExprTokenParser = bind(CharacterSet(charactersIn: "+-*/").parser) { character in
result(.symbol(String(character)))
}
let numberParser: SExprTokenParser = bind(oneOrMore(CharacterSet.decimalDigits.parser)) { digits in
let digitsString = String(String.UnicodeScalarView(digits))
guard let double = Double(digitsString) else {
throw "\(digitsString) is not a number"
}
return result(.number(double))
}
typealias SExprTokenizer = Parser<Unicode.Scalar, AnyCollection<SExprToken>>
func ignoreWhiteSpaces<Output>(
then parser: @escaping Parser<Unicode.Scalar, Output>
) -> Parser<Unicode.Scalar, Output> {
bind(zeroOrMore(CharacterSet.whitespacesAndNewlines.parser)) { _ in
parser
}
}
let sexprTokenizer: SExprTokenizer = oneOrMore(
ignoreWhiteSpaces(then:
or(leftParenthesesParser, or(rightParenthesesParser, or(symbolParser, numberParser)))))
#if false
do {
let source = "(+ 1 (* 2 3))"
let (tokens, remaining) = try sexprTokenizer(AnyCollection(source.unicodeScalars))
print(Array(tokens))
print(Array(remaining))
} catch {
print(error)
}
#endif
// MARK: - Parser
typealias SExprParser = Parser<SExprToken, SExpr>
extension SExprToken {
var sexpr: SExpr? {
switch self {
case .symbol(let symbol):
.atom(.symbol(symbol))
case .number(let number):
.atom(.number(number))
default:
nil
}
}
}
func parentheses<Output>(
_ factory: @escaping () -> Parser<SExprToken, Output>
) -> Parser<SExprToken, Output> {
bind(satisfy { $0 == .leftParentheses }) { _ in
bind(factory()) { output in
bind(satisfy { $0 == .rightParentheses }) { _ in
result(output)
}
}
}
}
let atomParser: SExprParser = bind(consume()) { token in
guard let sexpr = token.sexpr else {
throw "Not atom"
}
return result(sexpr)
}
let sexprParser = bind(parentheses { zeroOrMore(or(sexprParser, atomParser)) }) { exprs in
result(.list(exprs))
}
do {
let source = "(+ 1 (* 2 3) (* 4 5))"
let (tokens, _) = try sexprTokenizer(AnyCollection(source.unicodeScalars))
print(Array(tokens))
let (sexpr, _) = try sexprParser(tokens)
print(sexpr)
print(try eval(sexpr))
} catch {
print(error)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment