Last active
December 25, 2015 06:48
-
-
Save keyle/6934293 to your computer and use it in GitHub Desktop.
humble beginnings.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"strings" | |
) | |
const ( | |
END = "<END>" | |
ASSIGN = "<ASSIGN>" | |
WORD = "<WORD>" | |
VAR = "<VAR>" | |
LIB = "<LIB>" | |
KEYWORD = "<KEYWORD>" | |
BEGSTRING = "<BEGSTR>" | |
ENDSTRING = "<ENDSTR>" | |
BOOL = "<BOOL>" | |
STRINGVAL = "<STRINGVAL>" | |
) | |
var source string = `a = "hello world" | |
out a` | |
var tokens []token | |
type token struct { | |
tokentype string | |
content string | |
} | |
/* | |
lexical analysis - from a blob of text to a stream of tokens (-> illegal characters) | |
parsing - tokens get parsed to fit the grammar into a tree (-> syntax errors) | |
semantical analysis - annotated syntax tree (-> type inference issues) | |
generator - generate intermediate code | |
optimizer - clean up generated code and optimize | |
builder - produce a binary | |
*/ | |
func main() { | |
tokenize(source) | |
lexer() | |
} | |
var currentWord string = "" | |
func tokenize(blob string) { | |
blob = blob + "\n" | |
letters := strings.Split(blob, "") | |
inString := false | |
for _, c := range letters { | |
if c == "\n" || c == ";" { | |
// end of line or ; | |
assembleWord() | |
emit(token{END, ""}) | |
} else if c == "\"" { | |
// quotes | |
assembleWord() | |
if !inString { | |
emit(token{BEGSTRING, ""}) | |
} else { | |
emit(token{ENDSTRING, ""}) | |
} | |
inString = !inString | |
} else if c == "=" { | |
// ASSIGN | |
assembleWord() | |
emit(token{ASSIGN, ""}) | |
} else if c == " " && !inString { // space, don't emit | |
// space | |
if !inString { | |
assembleWord() | |
} | |
} else { | |
currentWord += c | |
} | |
} | |
fmt.Println(tokens) | |
} | |
func assembleWord() { | |
if currentWord == "" { | |
return | |
} | |
emit(token{WORD, currentWord}) | |
currentWord = "" | |
} | |
/* **************************************** */ | |
var library = []string{"out"} | |
var keywords = []string{"if", "for", "ret"} | |
func lexer() { | |
for i, _ := range tokens { | |
if tokens[i].tokentype == WORD { | |
// find assignments | |
if tokens[i+1].tokentype == ASSIGN { | |
tokens[i].tokentype = VAR | |
continue | |
} | |
if tokens[i].content == "true" || tokens[i].content == "false" { | |
tokens[i].tokentype = BOOL | |
continue | |
} | |
// find lib calls | |
for _, lib := range library { | |
if tokens[i].content == lib { | |
tokens[i].tokentype = LIB | |
} | |
} | |
// find keywords defs | |
for _, keyw := range keywords { | |
if tokens[i].content == keyw { | |
tokens[i].tokentype = KEYWORD | |
} | |
} | |
} | |
// find string value | |
if tokens[i].tokentype == BEGSTRING { | |
if tokens[i+1].tokentype == WORD && tokens[i+2].tokentype == ENDSTRING { | |
tokens[i+1].tokentype = STRINGVAL | |
} | |
} | |
// remainding words should be vars being used | |
if tokens[i].tokentype == WORD { | |
tokens[i].tokentype = VAR | |
} | |
} | |
fmt.Println(tokens) | |
} | |
func emit(t token) { | |
tokens = append(tokens, t) | |
} | |
type pattern struct { | |
name, start, end, match string // TBD lexer / grammar | |
} | |
/** parsing **/ | |
/** types of statements **/ | |
// add later: goto and assertions | |
type Statement struct{} | |
/**/ type Assignement struct{ Left, Right string } | |
/**/ type Call struct{ Callee string } | |
/**/ type Return struct{ Returnee string } | |
/** collection of statements **/ | |
type Block struct{ Statements []string } | |
type Subroutine struct { | |
name string | |
Block Block | |
parameters []string | |
} | |
type Var struct{ name string } | |
type Condition struct{ Block Block } | |
type Loop struct{ Block Block } | |
type SwitchStatement struct { // temp | |
Cond string | |
Cases []string | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment