Skip to content

Instantly share code, notes, and snippets.

@zaydek-old
Last active August 29, 2019 06:14
Show Gist options
  • Save zaydek-old/e20864abe25ecbad7311ee1f7fed82f3 to your computer and use it in GitHub Desktop.
Save zaydek-old/e20864abe25ecbad7311ee1f7fed82f3 to your computer and use it in GitHub Desktop.
const Token = {
UNS: "uns", // unset (not whitespace)
COM: "com", // comment
KEY: "key", // keyword
NUM: "num", // number
STR: "str", // string
PUN: "pun", // punctuation
FUN: "fun", // function
}
class Lexer {
constructor(value) {
this.value = value
this.x1 = 0
this.x2 = 0
this.width = 0
this.lines = [[]]
}
next() {
if (this.x2 == this.value.length) {
this.width = 0
return undefined
}
const ch = this.value[this.x2]
this.width = 1
this.x2 += this.width
return ch
}
peek() {
const ch = this.next()
this.backup()
return ch
}
backup() {
this.x2 -= this.width
}
emit(token) {
const nth = this.lines.length - 1
this.lines[nth].push({token, value: this.focus()})
this.ignore()
}
emit_line(token) {
this.backup()
this.emit(token)
this.lines.push([])
this.next()
this.ignore()
}
focus() {
return this.value.slice(this.x1, this.x2)
}
ignore() {
this.x1 = this.x2
}
accept(str) {
return str.includes(this.next()) || !!this.backup()
}
accept_run(str) {
while (this.accept(str)) {
// no op
}
} }
const key_map = {}
;(function () {
const keys = "break default func interface select case defer go map struct chan else goto package switch const fallthrough if range type continue for import return var bool byte complex64 complex128 error float32 float64 int int8 int16 int32 int64 rune string uint uint8 uint16 uint32 uint64 uintptr true false iota nil append cap close complex copy delete imag len make new panic print println real recover"
for (var key of keys.split(" ")) {
key_map[key] = true
}
}())
function parse_go(value) {
return parse(lex(value))
}
function lex(value) {
const lexer = new Lexer(value)
let ch = ""
while ((ch = lexer.next())) {
let token = 0
switch (true) {
// comment
case ch == "/" && (lexer.peek() == "/" || lexer.peek() == "*"):
ch = lexer.next()
if (ch == "/") {
while ((ch = lexer.next())) {
if (ch == "\n") {
lexer.backup()
break
}
}
} else if (ch == "*") {
while ((ch = lexer.next())) {
if (ch == "*" && lexer.peek() == "/") {
lexer.next()
break
} else if (ch == "\n") {
lexer.emit_line(Token.COM)
// don't break
}
}
}
token = Token.COM
break
// whitespace
case ch == " " || ch == "\t" || ch == "\n":
if (lexer.x2 > 1 && ch == "\n") {
lexer.lines.push([])
lexer.ignore()
break
}
lexer.accept_run(" \t")
break
// keyword or function
case ch >= "a" && ch <= "z" || ch >= "A" && ch <= "Z" || ch == "_":
lexer.accept_run("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789")
if (key_map[lexer.focus()]) {
token = Token.KEY
break
}
const x2 = lexer.x2
lexer.accept_run(" ")
if (lexer.peek() == "(") {
token = Token.FUN
}
lexer.x2 = x2
token = token || Token.UNS
break
// string
case ch == "'" || ch == "\"" || ch == "`":
const quote = ch
while ((ch = lexer.next())) {
if (quote != "`" && ch == "\\" && lexer.peek() == quote) {
lexer.next()
} else if (quote == "`" && ch == "\n") {
lexer.emit_line(Token.STR)
// don't break
} else if (ch == quote || ch == "\n") { // break opportunities
if (ch == "\n") {
lexer.backup()
}
break
}
}
token = Token.STR
break
// number
case ch >= "0" && ch <= "9":
let base = "0123456789"
if (lexer.accept("0") && lexer.accept("xX")) {
base += "abcdefABCDEF"
}
lexer.accept_run(base)
lexer.accept(".") && lexer.accept_run(base)
lexer.accept("eE") && lexer.accept("-+") && lexer.accept_run("0123456789")
lexer.accept("i")
token = Token.NUM
break
// punctuation
case "!%&()*+,-./:;<=>[]^{|}".includes(ch):
lexer.accept_run("!%&()*+,-./:;<=>[]^{|}")
token = Token.PUN
break
// not whitespace
default:
while ((ch = lexer.next())) {
if (ch == " " || ch == "\t" || ch == "\n") {
lexer.backup()
break
}
}
token = Token.UNS
break
}
if (lexer.x1 < lexer.x2) {
lexer.emit(token)
}
}
return lexer.lines
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment