munificent/lexer.wren

## lexer.wren
class Token {
  new(type, text) {
    _type = type
    _text = text
  }

  type { _type }
  text { _text }

  toString { _text + " " + _type }
}

// Punctuators.
var tokenLeftParen = "leftParen"
var tokenRightParen = "rightParen"
var tokenLeftBracket = "leftBracket"
var tokenRightBracket = "rightBracket"
var tokenLeftBrace = "leftBrace"
var tokenRightBrace = "rightBrace"
var tokenColon = "colon"
var tokenDot = "dot"
var tokenDotDot = "dotDot"
var tokenDotDotDot = "dotDotDot"
var tokenComma = "comma"
var tokenStar = "star"
var tokenSlash = "slash"
var tokenPercent = "percent"
var tokenPlus = "plus"
var tokenMinus = "minus"
var tokenPipe = "pipe"
var tokenPipePipe = "pipePipe"
var tokenAmp = "amp"
var tokenAmpAmp = "ampAmp"
var tokenBang = "bang"
var tokenTilde = "tilde"
var tokenEqual = "equal"
var tokenLess = "less"
var tokenGreater = "greater"
var tokenLessEqual = "lessEqual"
var tokenGreaterEqual = "greaterEqual"
var tokenEqualEqual = "equalEqual"
var tokenBangEqual = "bangEqual"

// Keywords.
var tokenBreak = "break"
var tokenClass = "class"
var tokenElse = "else"
var tokenFalse = "false"
var tokenFor = "for"
var tokenIf = "if"
var tokenIn = "in"
var tokenIs = "is"
var tokenNew = "new"
var tokenNull = "null"
var tokenReturn = "return"
var tokenStatic = "static"
var tokenSuper = "super"
var tokenThis = "this"
var tokenTrue = "true"
var tokenVar = "var"
var tokenWhile = "while"

// TOKEN_FIELD
// TOKEN_STATIC_FIELD
var tokenName = "name"
var tokenNumber = "number"
var tokenString = "string"
var tokenLine = "line"
var tokenError = "error"
var tokenEof = "eof"

class Lexer {
  new(source) {
    _source = source
    _start = 0
    _current = 0
  }

  tokenize {
    return new Fiber {
      while (_current < _source.count) {
        skipSpace

        _start = _current

        // TODO: A map or switch would be nice.
        if (match("(")) {
          makeToken(tokenLeftParen)
        } else if (match(")")) {
          makeToken(tokenRightParen)
        } else if (match("[")) {
          makeToken(tokenLeftBracket)
        } else if (match("]")) {
          makeToken(tokenRightBracket)
        } else if (match("{")) {
          makeToken(tokenLeftBrace)
        } else if (match("}")) {
          makeToken(tokenRightBrace)
        } else if (match(":")) {
          makeToken(tokenColon)
        } else if (match(".")) {
          if (match(".")) {
            if (match(".")) {
              makeToken(tokenDotDotDot)
            } else {
              makeToken(tokenDotDot)
            }
          } else {
            makeToken(tokenDot)
          }
        } else if (match(",")) {
          makeToken(tokenComma)
        } else if (match("*")) {
          makeToken(tokenStar)
        } else if (match("/")) {
          makeToken(tokenSlash)
        } else if (match("%")) {
          makeToken(tokenPercent)
        } else if (match("+")) {
          makeToken(tokenPlus)
        } else if (match("-")) {
          makeToken(tokenMinus)
        } else if (match("|")) {
          if (match("|")) {
            makeToken(tokenPipePipe)
          } else {
            makeToken(tokenPipe)
          }
        } else if (match("&")) {
          if (match("&")) {
            makeToken(tokenAmpAmp)
          } else {
            makeToken(tokenAmp)
          }
        } else if (match("!")) {
          if (match("=")) {
            makeToken(tokenBangEqual)
          } else {
            makeToken(tokenBang)
          }
        } else if (match("~")) {
          makeToken(tokenTilde)
        } else if (match("=")) {
          if (match("=")) {
            makeToken(tokenEqualEqual)
          } else {
            makeToken(tokenEqual)
          }
        } else if (match("<")) {
          if (match("=")) {
            makeToken(tokenLessEqual)
          } else {
            makeToken(tokenLess)
          }
        } else if (match(">")) {
          if (match("=")) {
            makeToken(tokenGreaterEqual)
          } else {
            makeToken(tokenGreater)
          }
        } else if ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(peek)) {
          // TODO: Better way to compare characters!
          readName
        } else {
          // TODO: Do something better here.
          advance
          makeToken(tokenError)
        }
      }

      _start = _current
      makeToken(tokenEof)
    }
  }

  // Advances past the current character.
  advance {
    _current = _current + 1
  }

  // Gets the current character.
  peek { _source[_current] }

  // Consumes the current character if it is [c].
  match(c) {
    if (_current < _source.count && _source[_current] == c) {
      _current = _current + 1
      return true
    }

    return false
  }

  // Creates a token of [type] from the current character range.
  makeToken(type) {
    // TODO: Substring method.
    var text = ""
    for (i in _start..._current) {
      text = text + _source[i]
    }

    Fiber.yield(new Token(type, text))
  }

  // Skips over whitespace characters.
  skipSpace {
    while (match(" ") || match("\t")) {
      // Already advanced.
    }
  }

  // Reads an identifier or keyword token.
  readName {
    advance
    while (_current < _source.count && "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789".contains(peek)) {
      advance
    }

    var type = tokenName

    // TODO: Unify with makeToken.
    var text = ""
    for (i in _start..._current) {
      text = text + _source[i]
    }

    if (text == "break") {
      type = tokenBreak
    } else if (text == "class") {
      type = tokenClass
    } else if (text == "else") {
      type = tokenElse
    } else if (text == "false") {
      type = tokenFalse
    } else if (text == "for") {
      type = tokenFor
    } else if (text == "if") {
      type = tokenIf
    } else if (text == "in") {
      type = tokenIn
    } else if (text == "is") {
      type = tokenIs
    } else if (text == "new") {
      type = tokenNew
    } else if (text == "null") {
      type = tokenNull
    } else if (text == "return") {
      type = tokenReturn
    } else if (text == "static") {
      type = tokenStatic
    } else if (text == "super") {
      type = tokenSuper
    } else if (text == "this") {
      type = tokenThis
    } else if (text == "true") {
      type = tokenTrue
    } else if (text == "var") {
      type = tokenVar
    } else if (text == "while") {
      type = tokenWhile
    }

    Fiber.yield(new Token(type, text))
  }
}

var s = "()(([ .foo_BAR123:..,... ]%|||&&& {    \t}!~)+-*/=!===<><=>=\n" +
        "break class else false for if in is new null return static super this true var while"

var lexer = new Lexer(s)
var tokens = lexer.tokenize
while (true) {
  var token = tokens.call
  if (tokens.isDone) break
  IO.print(token)
}
	class Token {
	new(type, text) {
	_type = type
	_text = text
	}

	type { _type }
	text { _text }

	toString { _text + " " + _type }
	}

	// Punctuators.
	var tokenLeftParen = "leftParen"
	var tokenRightParen = "rightParen"
	var tokenLeftBracket = "leftBracket"
	var tokenRightBracket = "rightBracket"
	var tokenLeftBrace = "leftBrace"
	var tokenRightBrace = "rightBrace"
	var tokenColon = "colon"
	var tokenDot = "dot"
	var tokenDotDot = "dotDot"
	var tokenDotDotDot = "dotDotDot"
	var tokenComma = "comma"
	var tokenStar = "star"
	var tokenSlash = "slash"
	var tokenPercent = "percent"
	var tokenPlus = "plus"
	var tokenMinus = "minus"
	var tokenPipe = "pipe"
	var tokenPipePipe = "pipePipe"
	var tokenAmp = "amp"
	var tokenAmpAmp = "ampAmp"
	var tokenBang = "bang"
	var tokenTilde = "tilde"
	var tokenEqual = "equal"
	var tokenLess = "less"
	var tokenGreater = "greater"
	var tokenLessEqual = "lessEqual"
	var tokenGreaterEqual = "greaterEqual"
	var tokenEqualEqual = "equalEqual"
	var tokenBangEqual = "bangEqual"

	// Keywords.
	var tokenBreak = "break"
	var tokenClass = "class"
	var tokenElse = "else"
	var tokenFalse = "false"
	var tokenFor = "for"
	var tokenIf = "if"
	var tokenIn = "in"
	var tokenIs = "is"
	var tokenNew = "new"
	var tokenNull = "null"
	var tokenReturn = "return"
	var tokenStatic = "static"
	var tokenSuper = "super"
	var tokenThis = "this"
	var tokenTrue = "true"
	var tokenVar = "var"
	var tokenWhile = "while"

	// TOKEN_FIELD
	// TOKEN_STATIC_FIELD
	var tokenName = "name"
	var tokenNumber = "number"
	var tokenString = "string"
	var tokenLine = "line"
	var tokenError = "error"
	var tokenEof = "eof"

	class Lexer {
	new(source) {
	_source = source
	_start = 0
	_current = 0
	}

	tokenize {
	return new Fiber {
	while (_current < _source.count) {
	skipSpace

	_start = _current

	// TODO: A map or switch would be nice.
	if (match("(")) {
	makeToken(tokenLeftParen)
	} else if (match(")")) {
	makeToken(tokenRightParen)
	} else if (match("[")) {
	makeToken(tokenLeftBracket)
	} else if (match("]")) {
	makeToken(tokenRightBracket)
	} else if (match("{")) {
	makeToken(tokenLeftBrace)
	} else if (match("}")) {
	makeToken(tokenRightBrace)
	} else if (match(":")) {
	makeToken(tokenColon)
	} else if (match(".")) {
	if (match(".")) {
	if (match(".")) {
	makeToken(tokenDotDotDot)
	} else {
	makeToken(tokenDotDot)
	}
	} else {
	makeToken(tokenDot)
	}
	} else if (match(",")) {
	makeToken(tokenComma)
	} else if (match("*")) {
	makeToken(tokenStar)
	} else if (match("/")) {
	makeToken(tokenSlash)
	} else if (match("%")) {
	makeToken(tokenPercent)
	} else if (match("+")) {
	makeToken(tokenPlus)
	} else if (match("-")) {
	makeToken(tokenMinus)
	} else if (match("\|")) {
	if (match("\|")) {
	makeToken(tokenPipePipe)
	} else {
	makeToken(tokenPipe)
	}
	} else if (match("&")) {
	if (match("&")) {
	makeToken(tokenAmpAmp)
	} else {
	makeToken(tokenAmp)
	}
	} else if (match("!")) {
	if (match("=")) {
	makeToken(tokenBangEqual)
	} else {
	makeToken(tokenBang)
	}
	} else if (match("~")) {
	makeToken(tokenTilde)
	} else if (match("=")) {
	if (match("=")) {
	makeToken(tokenEqualEqual)
	} else {
	makeToken(tokenEqual)
	}
	} else if (match("<")) {
	if (match("=")) {
	makeToken(tokenLessEqual)
	} else {
	makeToken(tokenLess)
	}
	} else if (match(">")) {
	if (match("=")) {
	makeToken(tokenGreaterEqual)
	} else {
	makeToken(tokenGreater)
	}
	} else if ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(peek)) {
	// TODO: Better way to compare characters!
	readName
	} else {
	// TODO: Do something better here.
	advance
	makeToken(tokenError)
	}
	}

	_start = _current
	makeToken(tokenEof)
	}
	}

	// Advances past the current character.
	advance {
	_current = _current + 1
	}

	// Gets the current character.
	peek { _source[_current] }

	// Consumes the current character if it is [c].
	match(c) {
	if (_current < _source.count && _source[_current] == c) {
	_current = _current + 1
	return true
	}

	return false
	}

	// Creates a token of [type] from the current character range.
	makeToken(type) {
	// TODO: Substring method.
	var text = ""
	for (i in _start..._current) {
	text = text + _source[i]
	}

	Fiber.yield(new Token(type, text))
	}

	// Skips over whitespace characters.
	skipSpace {
	while (match(" ") \|\| match("\t")) {
	// Already advanced.
	}
	}

	// Reads an identifier or keyword token.
	readName {
	advance
	while (_current < _source.count && "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789".contains(peek)) {
	advance
	}

	var type = tokenName

	// TODO: Unify with makeToken.
	var text = ""
	for (i in _start..._current) {
	text = text + _source[i]
	}

	if (text == "break") {
	type = tokenBreak
	} else if (text == "class") {
	type = tokenClass
	} else if (text == "else") {
	type = tokenElse
	} else if (text == "false") {
	type = tokenFalse
	} else if (text == "for") {
	type = tokenFor
	} else if (text == "if") {
	type = tokenIf
	} else if (text == "in") {
	type = tokenIn
	} else if (text == "is") {
	type = tokenIs
	} else if (text == "new") {
	type = tokenNew
	} else if (text == "null") {
	type = tokenNull
	} else if (text == "return") {
	type = tokenReturn
	} else if (text == "static") {
	type = tokenStatic
	} else if (text == "super") {
	type = tokenSuper
	} else if (text == "this") {
	type = tokenThis
	} else if (text == "true") {
	type = tokenTrue
	} else if (text == "var") {
	type = tokenVar
	} else if (text == "while") {
	type = tokenWhile
	}

	Fiber.yield(new Token(type, text))
	}
	}

	var s = "()(([ .foo_BAR123:..,... ]%\|\|\|&&& { \t}!~)+-*/=!===<><=>=\n" +
	"break class else false for if in is new null return static super this true var while"

	var lexer = new Lexer(s)
	var tokens = lexer.tokenize
	while (true) {
	var token = tokens.call
	if (tokens.isDone) break
	IO.print(token)
	}