keyle/Glow.go

## Glow.go
package main

import (
	"fmt"
	"strings"
)

const (
	END       = "<END>"
	ASSIGN    = "<ASSIGN>"
	WORD      = "<WORD>"
	VAR       = "<VAR>"
	LIB       = "<LIB>"
	KEYWORD   = "<KEYWORD>"
	BEGSTRING = "<BEGSTR>"
	ENDSTRING = "<ENDSTR>"
	BOOL      = "<BOOL>"
	STRINGVAL = "<STRINGVAL>"
)

var source string = `a = "hello world"
out a`

var tokens []token

type token struct {
	tokentype string
	content   string
}

/*
	lexical analysis - from a blob of text to a stream of tokens (-> illegal characters)
	parsing - tokens get parsed to fit the grammar into a tree (-> syntax errors)
	semantical analysis - annotated syntax tree (-> type inference issues)
	generator - generate intermediate code
	optimizer - clean up generated code and optimize
	builder - produce a binary
*/

func main() {

	tokenize(source)
	lexer()
}

var currentWord string = ""

func tokenize(blob string) {

	blob = blob + "\n"
	letters := strings.Split(blob, "")
	inString := false

	for _, c := range letters {

		if c == "\n" || c == ";" {
			// end of line or ;
			assembleWord()
			emit(token{END, ""})

		} else if c == "\"" {
			// quotes
			assembleWord()
			if !inString {
				emit(token{BEGSTRING, ""})
			} else {
				emit(token{ENDSTRING, ""})
			}
			inString = !inString

		} else if c == "=" {
			// ASSIGN
			assembleWord()
			emit(token{ASSIGN, ""})

		} else if c == " " && !inString { // space, don't emit
			// space
			if !inString {
				assembleWord()
			}

		} else {

			currentWord += c
		}
	}

	fmt.Println(tokens)
}

func assembleWord() {
	if currentWord == "" {
		return
	}

	emit(token{WORD, currentWord})
	currentWord = ""
}

/*    ****************************************   */

var library = []string{"out"}
var keywords = []string{"if", "for", "ret"}

func lexer() {

	for i, _ := range tokens {

		if tokens[i].tokentype == WORD {

			// find assignments
			if tokens[i+1].tokentype == ASSIGN {
				tokens[i].tokentype = VAR
				continue
			}

			if tokens[i].content == "true" || tokens[i].content == "false" {
				tokens[i].tokentype = BOOL
				continue
			}

			// find lib calls
			for _, lib := range library {
				if tokens[i].content == lib {
					tokens[i].tokentype = LIB
				}
			}

			// find keywords defs
			for _, keyw := range keywords {
				if tokens[i].content == keyw {
					tokens[i].tokentype = KEYWORD
				}
			}
		}

		// find string value
		if tokens[i].tokentype == BEGSTRING {
			if tokens[i+1].tokentype == WORD && tokens[i+2].tokentype == ENDSTRING {
				tokens[i+1].tokentype = STRINGVAL
			}
		}

		// remainding words should be vars being used
		if tokens[i].tokentype == WORD {
			tokens[i].tokentype = VAR
		}
	}

	fmt.Println(tokens)
}

func emit(t token) {
	tokens = append(tokens, t)
}

type pattern struct {
	name, start, end, match string // TBD lexer / grammar
}

/** parsing **/

/** types of statements **/
// add later: goto and assertions
type Statement struct{}

/**/ type Assignement struct{ Left, Right string }

/**/ type Call struct{ Callee string }

/**/ type Return struct{ Returnee string }

/** collection of statements **/
type Block struct{ Statements []string }
type Subroutine struct {
	name       string
	Block      Block
	parameters []string
}
type Var struct{ name string }
type Condition struct{ Block Block }
type Loop struct{ Block Block }
type SwitchStatement struct { // temp
	Cond  string
	Cases []string
}
	package main

	import (
	"fmt"
	"strings"
	)

	const (
	END = "<END>"
	ASSIGN = "<ASSIGN>"
	WORD = "<WORD>"
	VAR = "<VAR>"
	LIB = "<LIB>"
	KEYWORD = "<KEYWORD>"
	BEGSTRING = "<BEGSTR>"
	ENDSTRING = "<ENDSTR>"
	BOOL = "<BOOL>"
	STRINGVAL = "<STRINGVAL>"
	)

	var source string = `a = "hello world"
	out a`

	var tokens []token

	type token struct {
	tokentype string
	content string
	}

	/*
	lexical analysis - from a blob of text to a stream of tokens (-> illegal characters)
	parsing - tokens get parsed to fit the grammar into a tree (-> syntax errors)
	semantical analysis - annotated syntax tree (-> type inference issues)
	generator - generate intermediate code
	optimizer - clean up generated code and optimize
	builder - produce a binary
	*/

	func main() {

	tokenize(source)
	lexer()
	}

	var currentWord string = ""

	func tokenize(blob string) {

	blob = blob + "\n"
	letters := strings.Split(blob, "")
	inString := false

	for _, c := range letters {

	if c == "\n" \|\| c == ";" {
	// end of line or ;
	assembleWord()
	emit(token{END, ""})

	} else if c == "\"" {
	// quotes
	assembleWord()
	if !inString {
	emit(token{BEGSTRING, ""})
	} else {
	emit(token{ENDSTRING, ""})
	}
	inString = !inString

	} else if c == "=" {
	// ASSIGN
	assembleWord()
	emit(token{ASSIGN, ""})

	} else if c == " " && !inString { // space, don't emit
	// space
	if !inString {
	assembleWord()
	}

	} else {

	currentWord += c
	}
	}

	fmt.Println(tokens)
	}

	func assembleWord() {
	if currentWord == "" {
	return
	}

	emit(token{WORD, currentWord})
	currentWord = ""
	}

	/* **************************************** */

	var library = []string{"out"}
	var keywords = []string{"if", "for", "ret"}

	func lexer() {

	for i, _ := range tokens {

	if tokens[i].tokentype == WORD {

	// find assignments
	if tokens[i+1].tokentype == ASSIGN {
	tokens[i].tokentype = VAR
	continue
	}

	if tokens[i].content == "true" \|\| tokens[i].content == "false" {
	tokens[i].tokentype = BOOL
	continue
	}

	// find lib calls
	for _, lib := range library {
	if tokens[i].content == lib {
	tokens[i].tokentype = LIB
	}
	}

	// find keywords defs
	for _, keyw := range keywords {
	if tokens[i].content == keyw {
	tokens[i].tokentype = KEYWORD
	}
	}
	}

	// find string value
	if tokens[i].tokentype == BEGSTRING {
	if tokens[i+1].tokentype == WORD && tokens[i+2].tokentype == ENDSTRING {
	tokens[i+1].tokentype = STRINGVAL
	}
	}

	// remainding words should be vars being used
	if tokens[i].tokentype == WORD {
	tokens[i].tokentype = VAR
	}
	}

	fmt.Println(tokens)
	}

	func emit(t token) {
	tokens = append(tokens, t)
	}

	type pattern struct {
	name, start, end, match string // TBD lexer / grammar
	}

	/ parsing /

	/ types of statements /
	// add later: goto and assertions
	type Statement struct{}

	/**/ type Assignement struct{ Left, Right string }

	/**/ type Call struct{ Callee string }

	/**/ type Return struct{ Returnee string }

	/ collection of statements /
	type Block struct{ Statements []string }
	type Subroutine struct {
	name string
	Block Block
	parameters []string
	}
	type Var struct{ name string }
	type Condition struct{ Block Block }
	type Loop struct{ Block Block }
	type SwitchStatement struct { // temp
	Cond string
	Cases []string
	}