Skip to content

Instantly share code, notes, and snippets.

@chriseidhof
Last active September 3, 2020 15:17
Show Gist options
  • Save chriseidhof/831c61250ccf3822d0d63f3b706aae84 to your computer and use it in GitHub Desktop.
Save chriseidhof/831c61250ccf3822d0d63f3b706aae84 to your computer and use it in GitHub Desktop.
// Three different lexer implementations. First a "consume everything" lexer, then a "lazy" lexer (that is, an iterator), then a lazy lexer without explicit state.
enum Token {
case heading(level: Int)
case star
case text(String)
}
import Foundation
enum LexState {
case heading(Int)
case other
}
extension Substring {
mutating func lex() -> [Token] {
var tokens: [Token] = []
var state = LexState.other
// In a classical lexer, we need to keep track of state.
func flush() {
switch state {
case .heading(let x):
tokens.append(.heading(level: x))
case .other: ()
}
}
while let next = self.popFirst() {
switch (state, next) {
case (.other, "#"):
state = .heading(1)
case (.heading(let x), "#"):
state = .heading(x + 1)
case (_, "*"):
flush()
tokens.append(.star)
default:
fatalError()
}
}
return tokens
}
}
let source = "###**##"
var rem = source[...]
rem.lex()
// we can model the same lexer as an iterator. we store the state as an instance property.
struct Lexer: IteratorProtocol {
var state: LexState
var remainder: Substring
var buffer: [Token] = []
mutating func flush(adding: Token?) -> Token? {
defer { state = .other }
switch state {
case let .heading(x):
if let y = adding { buffer.append(y) }
return .heading(level: x)
default:
return adding
}
}
mutating func next() -> Token? {
if !buffer.isEmpty {
return buffer.remove(at: 0)
}
while let n = remainder.popFirst() {
switch (state, n) {
case (.other, "#"):
state = .heading(1)
case (.heading(let x), "#"):
state = .heading(x + 1)
case (_, "*"):
return flush(adding: .star)
default:
fatalError()
}
}
return flush(adding: nil)
}
}
let result = Array(AnySequence { Lexer(state: .other, remainder: source[...], buffer: []) })
result
// Instead of having an explicit LexState, we can also use functions to model our current state.
// I got the idea from Rob Pike's https://www.youtube.com/watch?v=HxaD_trXwRE, but as Swift doesn't have coroutines (yet?), we need continuations
struct Cont {
let run: (inout Substring) -> (Token, Cont)?
}
// We can now have a lazy token producer (the consumer asks for next tokens) without having to store explicit state:
struct Lexer2: IteratorProtocol {
var remainder: Substring
var continuation: Cont
init(_ remainder: Substring) {
self.remainder = remainder
self.continuation = Lexer2.startState()
}
mutating func next() -> Token? {
guard let (t, c) = continuation.run(&remainder) else { return nil }
self.continuation = c
return t
}
static func startState() -> Cont {
return Cont { str in
guard let n = str.popFirst() else { return nil }
return self.helper(n).run(&str)
}
}
static func helper(_ n: Character) -> Cont {
return Cont { str in
switch n {
case "#":
return self.heading().run(&str)
case "*":
return (.star, self.startState())
default:
fatalError() // not supported by the language yet
}
}
}
static func heading() -> Cont {
return Cont { str in
var count = 1 // when we get to this function we've seen exactly one "#"
while let n = str.popFirst() {
switch n {
case "#":
count += 1
default:
// here we can pass the already seen `n` to the helper method, which captures that "state" for us
return (.heading(level: count), Lexer2.helper(n))
}
}
return (.heading(level: count), Lexer2.startState())
}
}
}
var l = Array(AnySequence { Lexer2(source[...]) })
l
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment