Skip to content

Instantly share code, notes, and snippets.

@munificent
Created January 9, 2015 15:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save munificent/8b5c2240e7364913b6a6 to your computer and use it in GitHub Desktop.
Save munificent/8b5c2240e7364913b6a6 to your computer and use it in GitHub Desktop.
Start of lexer for Wren in Wren
class Token {
new(type, text) {
_type = type
_text = text
}
type { _type }
text { _text }
toString { _text + " " + _type }
}
// Punctuators.
var tokenLeftParen = "leftParen"
var tokenRightParen = "rightParen"
var tokenLeftBracket = "leftBracket"
var tokenRightBracket = "rightBracket"
var tokenLeftBrace = "leftBrace"
var tokenRightBrace = "rightBrace"
var tokenColon = "colon"
var tokenDot = "dot"
var tokenDotDot = "dotDot"
var tokenDotDotDot = "dotDotDot"
var tokenComma = "comma"
var tokenStar = "star"
var tokenSlash = "slash"
var tokenPercent = "percent"
var tokenPlus = "plus"
var tokenMinus = "minus"
var tokenPipe = "pipe"
var tokenPipePipe = "pipePipe"
var tokenAmp = "amp"
var tokenAmpAmp = "ampAmp"
var tokenBang = "bang"
var tokenTilde = "tilde"
var tokenEqual = "equal"
var tokenLess = "less"
var tokenGreater = "greater"
var tokenLessEqual = "lessEqual"
var tokenGreaterEqual = "greaterEqual"
var tokenEqualEqual = "equalEqual"
var tokenBangEqual = "bangEqual"
// Keywords.
var tokenBreak = "break"
var tokenClass = "class"
var tokenElse = "else"
var tokenFalse = "false"
var tokenFor = "for"
var tokenIf = "if"
var tokenIn = "in"
var tokenIs = "is"
var tokenNew = "new"
var tokenNull = "null"
var tokenReturn = "return"
var tokenStatic = "static"
var tokenSuper = "super"
var tokenThis = "this"
var tokenTrue = "true"
var tokenVar = "var"
var tokenWhile = "while"
// TOKEN_FIELD
// TOKEN_STATIC_FIELD
var tokenName = "name"
var tokenNumber = "number"
var tokenString = "string"
var tokenLine = "line"
var tokenError = "error"
var tokenEof = "eof"
class Lexer {
new(source) {
_source = source
_start = 0
_current = 0
}
tokenize {
return new Fiber {
while (_current < _source.count) {
skipSpace
_start = _current
// TODO: A map or switch would be nice.
if (match("(")) {
makeToken(tokenLeftParen)
} else if (match(")")) {
makeToken(tokenRightParen)
} else if (match("[")) {
makeToken(tokenLeftBracket)
} else if (match("]")) {
makeToken(tokenRightBracket)
} else if (match("{")) {
makeToken(tokenLeftBrace)
} else if (match("}")) {
makeToken(tokenRightBrace)
} else if (match(":")) {
makeToken(tokenColon)
} else if (match(".")) {
if (match(".")) {
if (match(".")) {
makeToken(tokenDotDotDot)
} else {
makeToken(tokenDotDot)
}
} else {
makeToken(tokenDot)
}
} else if (match(",")) {
makeToken(tokenComma)
} else if (match("*")) {
makeToken(tokenStar)
} else if (match("/")) {
makeToken(tokenSlash)
} else if (match("%")) {
makeToken(tokenPercent)
} else if (match("+")) {
makeToken(tokenPlus)
} else if (match("-")) {
makeToken(tokenMinus)
} else if (match("|")) {
if (match("|")) {
makeToken(tokenPipePipe)
} else {
makeToken(tokenPipe)
}
} else if (match("&")) {
if (match("&")) {
makeToken(tokenAmpAmp)
} else {
makeToken(tokenAmp)
}
} else if (match("!")) {
if (match("=")) {
makeToken(tokenBangEqual)
} else {
makeToken(tokenBang)
}
} else if (match("~")) {
makeToken(tokenTilde)
} else if (match("=")) {
if (match("=")) {
makeToken(tokenEqualEqual)
} else {
makeToken(tokenEqual)
}
} else if (match("<")) {
if (match("=")) {
makeToken(tokenLessEqual)
} else {
makeToken(tokenLess)
}
} else if (match(">")) {
if (match("=")) {
makeToken(tokenGreaterEqual)
} else {
makeToken(tokenGreater)
}
} else if ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(peek)) {
// TODO: Better way to compare characters!
readName
} else {
// TODO: Do something better here.
advance
makeToken(tokenError)
}
}
_start = _current
makeToken(tokenEof)
}
}
// Advances past the current character.
advance {
_current = _current + 1
}
// Gets the current character.
peek { _source[_current] }
// Consumes the current character if it is [c].
match(c) {
if (_current < _source.count && _source[_current] == c) {
_current = _current + 1
return true
}
return false
}
// Creates a token of [type] from the current character range.
makeToken(type) {
// TODO: Substring method.
var text = ""
for (i in _start..._current) {
text = text + _source[i]
}
Fiber.yield(new Token(type, text))
}
// Skips over whitespace characters.
skipSpace {
while (match(" ") || match("\t")) {
// Already advanced.
}
}
// Reads an identifier or keyword token.
readName {
advance
while (_current < _source.count && "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789".contains(peek)) {
advance
}
var type = tokenName
// TODO: Unify with makeToken.
var text = ""
for (i in _start..._current) {
text = text + _source[i]
}
if (text == "break") {
type = tokenBreak
} else if (text == "class") {
type = tokenClass
} else if (text == "else") {
type = tokenElse
} else if (text == "false") {
type = tokenFalse
} else if (text == "for") {
type = tokenFor
} else if (text == "if") {
type = tokenIf
} else if (text == "in") {
type = tokenIn
} else if (text == "is") {
type = tokenIs
} else if (text == "new") {
type = tokenNew
} else if (text == "null") {
type = tokenNull
} else if (text == "return") {
type = tokenReturn
} else if (text == "static") {
type = tokenStatic
} else if (text == "super") {
type = tokenSuper
} else if (text == "this") {
type = tokenThis
} else if (text == "true") {
type = tokenTrue
} else if (text == "var") {
type = tokenVar
} else if (text == "while") {
type = tokenWhile
}
Fiber.yield(new Token(type, text))
}
}
var s = "()(([ .foo_BAR123:..,... ]%|||&&& { \t}!~)+-*/=!===<><=>=\n" +
"break class else false for if in is new null return static super this true var while"
var lexer = new Lexer(s)
var tokens = lexer.tokenize
while (true) {
var token = tokens.call
if (tokens.isDone) break
IO.print(token)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment