Skip to content

Instantly share code, notes, and snippets.

@radex
Last active February 15, 2017 13:08
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save radex/20ccd14da08d56f47074 to your computer and use it in GitHub Desktop.
Save radex/20ccd14da08d56f47074 to your computer and use it in GitHub Desktop.
Wrote a little lexer/tokenizer for fun. (Warning: I have no idea what I'm doing)
import Foundation
struct Stream {
let string: NSString
var position: Int
var matchingRange: NSRange {
return NSRange(location: position, length: string.length - position)
}
}
struct Stack<T> {
var array: [T] = []
var tip: T? { return array.last }
mutating func push(x: T) { array.append(x) }
mutating func pop() -> T? {
if array.count > 0 {
return array.removeLast()
} else {
return nil
}
}
}
func matchRegexAt(#pattern: String, stream: Stream) -> String? {
let regex = NSRegularExpression(pattern: "^" + pattern, options: nil, error: nil)!
let match = regex.firstMatchInString(stream.string, options: nil, range: stream.matchingRange)
if let range = match?.rangeAtIndex(0) {
return stream.string.substringWithRange(range)
} else {
return nil
}
}
enum NextState {
case Stay
case Pop
case Push(String)
}
typealias Rule = Stream -> (Stream, Token?, NextState)?
func rule(regex: String, tokenizer: NSString -> Token?) -> Rule {
return rule(regex, .Stay, tokenizer)
}
func rule(regex: String, next: NextState, tokenizer: NSString -> Token?) -> Rule {
return { stream in
if let match: NSString = matchRegexAt(pattern: regex, stream) {
var newStream = stream
newStream.position += match.length
return (newStream, tokenizer(match), next)
} else {
return nil
}
}
}
enum Token: Printable {
case Symbol(Swift.String)
case Number(Int)
case String(Swift.String)
var description: Swift.String {
switch self {
case .Symbol(let string): return "SYMBOL \(string)"
case .Number(let number): return "NUMBER \(number)"
case .String(let string): return "STRING \"\(string)\""
}
}
}
class Lexer {
var stream: Stream
var tokens: [Token] = []
var states: [String: [Rule]] = [:]
var stack = Stack<String>()
init(string: String) {
stream = Stream(string: string, position: 0)
stack.push("root")
}
func registerState(name: String, _ ruleset: [Rule]) {
states[name] = ruleset
}
func lex() {
while true {
let ruleset = states[stack.tip!]!
switch matchRuleset(ruleset) {
case .Some(.Pop):
stack.pop()
case .Some(.Push(let state)):
stack.push(state)
default: return
}
}
}
func matchRuleset(ruleset: [Rule]) -> NextState? {
while stream.position < stream.string.length {
if let nextState = matchRulesetOnce(ruleset) {
switch nextState {
case .Stay: continue
default: return nextState
}
} else {
fatalError("No rule matched :(")
}
}
return nil
}
func matchRulesetOnce(ruleset: [Rule]) -> NextState? {
for rule in ruleset {
if let nextState = matchRule(rule) {
return nextState
}
}
return nil
}
func matchRule(rule: Rule) -> NextState? {
if let (outputStream, token, next) = rule(stream) {
stream = outputStream
if let token = token {
tokens.append(token)
}
return next
} else {
return nil
}
}
}
let lexer = Lexer(string: "blah 0 10 0xFF foo ; comment\n" + "blah 0b0101 blah \" some 0xFF string \" bla ")
lexer.registerState("root", [
// whitespace
rule("\\s+", { _ in nil }),
// comment
rule(";.*?\n", { _ in nil }),
// hex numbers
rule("0x[0-9a-fA-F]+", {
var number: UInt32 = 0
let scanner = NSScanner(string: $0)
scanner.scanHexInt(&number)
return .Number(Int(number))
}),
// bin numbers
rule("0b[01]+", {
let bin: NSString = $0.substringFromIndex(2)
return .Number(strtol(bin.UTF8String, nil, 2))
}),
// decimal numbers
rule("\\d+", { .Number($0.integerValue) }),
// strings
rule("\"", .Push("string"), { _ in nil }),
// words
rule("[a-zA-Z]+", { .Symbol($0) })
])
lexer.registerState("string", [
rule("[^\\\"]+", { .String($0) }),
rule("\"", .Pop, { _ in nil })
])
lexer.lex()
for token in lexer.tokens {
println(token.description)
}
lexer.stream.position
@Mazyod
Copy link

Mazyod commented Jan 30, 2016

This is really cool stuff, and even taking into consideration nested quotations 👍

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment