Skip to content

Instantly share code, notes, and snippets.

@keyle
Last active December 25, 2015 06:48
Show Gist options
  • Save keyle/6934293 to your computer and use it in GitHub Desktop.
Save keyle/6934293 to your computer and use it in GitHub Desktop.
humble beginnings.
package main
import (
"fmt"
"strings"
)
const (
END = "<END>"
ASSIGN = "<ASSIGN>"
WORD = "<WORD>"
VAR = "<VAR>"
LIB = "<LIB>"
KEYWORD = "<KEYWORD>"
BEGSTRING = "<BEGSTR>"
ENDSTRING = "<ENDSTR>"
BOOL = "<BOOL>"
STRINGVAL = "<STRINGVAL>"
)
var source string = `a = "hello world"
out a`
var tokens []token
type token struct {
tokentype string
content string
}
/*
lexical analysis - from a blob of text to a stream of tokens (-> illegal characters)
parsing - tokens get parsed to fit the grammar into a tree (-> syntax errors)
semantical analysis - annotated syntax tree (-> type inference issues)
generator - generate intermediate code
optimizer - clean up generated code and optimize
builder - produce a binary
*/
func main() {
tokenize(source)
lexer()
}
var currentWord string = ""
func tokenize(blob string) {
blob = blob + "\n"
letters := strings.Split(blob, "")
inString := false
for _, c := range letters {
if c == "\n" || c == ";" {
// end of line or ;
assembleWord()
emit(token{END, ""})
} else if c == "\"" {
// quotes
assembleWord()
if !inString {
emit(token{BEGSTRING, ""})
} else {
emit(token{ENDSTRING, ""})
}
inString = !inString
} else if c == "=" {
// ASSIGN
assembleWord()
emit(token{ASSIGN, ""})
} else if c == " " && !inString { // space, don't emit
// space
if !inString {
assembleWord()
}
} else {
currentWord += c
}
}
fmt.Println(tokens)
}
func assembleWord() {
if currentWord == "" {
return
}
emit(token{WORD, currentWord})
currentWord = ""
}
/* **************************************** */
var library = []string{"out"}
var keywords = []string{"if", "for", "ret"}
func lexer() {
for i, _ := range tokens {
if tokens[i].tokentype == WORD {
// find assignments
if tokens[i+1].tokentype == ASSIGN {
tokens[i].tokentype = VAR
continue
}
if tokens[i].content == "true" || tokens[i].content == "false" {
tokens[i].tokentype = BOOL
continue
}
// find lib calls
for _, lib := range library {
if tokens[i].content == lib {
tokens[i].tokentype = LIB
}
}
// find keywords defs
for _, keyw := range keywords {
if tokens[i].content == keyw {
tokens[i].tokentype = KEYWORD
}
}
}
// find string value
if tokens[i].tokentype == BEGSTRING {
if tokens[i+1].tokentype == WORD && tokens[i+2].tokentype == ENDSTRING {
tokens[i+1].tokentype = STRINGVAL
}
}
// remainding words should be vars being used
if tokens[i].tokentype == WORD {
tokens[i].tokentype = VAR
}
}
fmt.Println(tokens)
}
func emit(t token) {
tokens = append(tokens, t)
}
type pattern struct {
name, start, end, match string // TBD lexer / grammar
}
/** parsing **/
/** types of statements **/
// add later: goto and assertions
type Statement struct{}
/**/ type Assignement struct{ Left, Right string }
/**/ type Call struct{ Callee string }
/**/ type Return struct{ Returnee string }
/** collection of statements **/
type Block struct{ Statements []string }
type Subroutine struct {
name string
Block Block
parameters []string
}
type Var struct{ name string }
type Condition struct{ Block Block }
type Loop struct{ Block Block }
type SwitchStatement struct { // temp
Cond string
Cases []string
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment