Skip to content

Instantly share code, notes, and snippets.

Created May 27, 2017 13:41
Show Gist options
  • Save anonymous/ecfd39c1cdeb5be41f6d7a3310fcac4d to your computer and use it in GitHub Desktop.
Save anonymous/ecfd39c1cdeb5be41f6d7a3310fcac4d to your computer and use it in GitHub Desktop.
#import "fmt.odin";
#import "os.odin";
#import "utf8.odin";
#import "strconv.odin";
Lexer :: struct {
file: []byte,
offset: int,
last_rune_size: int,
}
TokenType :: enum {
Comma,
Colon,
SemiColon,
Dot,
Equal,
Plus,
Minus,
Slash,
Asterisk,
Ident,
Number,
String,
True,
False,
Func,
Struct,
EOF,
Unknown,
}
NumberType :: enum {
UsignedInteger,
SignedInteger,
Float,
}
/*
Token :: struct {
type: TokenType,
lexeme: string,
}
*/
Token :: union {
lexeme: string,
Comma {},
Colon {},
SemiColon {},
Dot {},
Equal {},
Plus {},
Minus {},
Slash {},
Asterisk {},
Ident {},
Number {
type: NumberType,
ui: u64,
si: i64,
float: f64,
},
String {},
True {},
False {},
Func {},
Struct {},
EOF {},
Unknown {},
}
init_lexer :: proc(using lexer: ^Lexer, path: string) {
offset = 0;
ok: bool;
file, ok = os.read_entire_file(path);
if !ok {
fmt.printf("Failed to open file '%s'\n", path);
os.exit(1);
}
}
is_whitespace :: proc(r: rune) -> bool {
return (r == ' ') || (r == '\t') || (r == '\r') || (r == '\n');
}
eat_whitespace :: proc(using lexer: ^Lexer) {
// TODO(thebirk): Handle linenumbers and chars
r := get_rune(lexer);
for r != utf8.RUNE_ERROR && is_whitespace(r) {
r = get_rune(lexer);
}
if r != utf8.RUNE_ERROR {
back(lexer);
}
}
back :: proc(using lexer: ^Lexer) {
//assert(last_rune_size != 0);
offset -= last_rune_size;
last_rune_size = 0;
}
get_rune :: proc(using lexer: ^Lexer) -> rune {
r, i := utf8.decode_rune(file[offset..]);
offset += i;
last_rune_size = i;
return r;
}
is_alpha :: proc(r: rune) -> bool {
return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z');
}
is_num :: proc(r: rune) -> bool {
return (r >= '0' && r <= '9');
}
is_alnum :: proc(r: rune) -> bool {
return is_alpha(r) || is_num(r);
}
is_hex :: proc(r: rune) -> bool {
return is_num(r) || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F');
}
is_binary :: proc(r: rune) -> bool {
return (r == '0' || r == '1');
}
get_token :: proc(using lexer: ^Lexer) -> Token {
eat_whitespace(lexer);
r := get_rune(lexer);
if r == utf8.RUNE_ERROR {
return Token{TokenType.EOF, "(EOF)"};
}
match r {
case ',': {
return Token{TokenType.Comma, ","};
}
case ':': {
return Token{TokenType.Colon, ":"};
}
case ';': {
return Token{TokenType.SemiColon, ";"};
}
case '.': {
return Token{TokenType.Dot, "."};
}
case '=': {
return Token{TokenType.Equal, "="};
}
case '+': {
return Token{TokenType.Plus, "+"};
}
case '-': {
return Token{TokenType.Minus, "-"};
}
case '*': {
return Token{TokenType.Asterisk, "*"};
}
case '"': {
start := offset;
r = get_rune(lexer);
for r != utf8.RUNE_ERROR && r != '"' {
r = get_rune(lexer);
}
str := string(file[start..offset-2]);
return Token{TokenType.String, str};
}
case '/': {
r = get_rune(lexer);
if r != '/' {
back(lexer);
return Token{TokenType.Slash, "/"};
} else {
for r != utf8.RUNE_ERROR && r != '\n' {
r = get_rune(lexer);
}
back(lexer);
return get_token(lexer);
}
}
}
if is_num(r) {
start := offset-1;
old_r := r;
r = get_rune(lexer);
// hexadecimal or binary
if old_r == '0' {
match r {
case 'x': {
start = offset;
r = get_rune(lexer);
for r != utf8.RUNE_ERROR && is_hex(r) {
r = get_rune(lexer);
}
back(lexer);
num := string(file[start..offset-1]);
return Token{TokenType.Number, num};
}
case 'b': {
start = offset;
r = get_rune(lexer);
for r != utf8.RUNE_ERROR && is_binary(r) {
r = get_rune(lexer);
}
back(lexer);
num := string(file[start..offset-1]);
return Token{TokenType.Number, num};
}
}
}
// normal integer
for r != utf8.RUNE_ERROR && is_num(r) {
r = get_rune(lexer);
}
back(lexer);
num := string(file[start..offset-1]);
return Token{TokenType.Number, num};
}
if is_alpha(r) {
start := offset-1;
r = get_rune(lexer);
for r != utf8.RUNE_ERROR && (is_alnum(r) || r == '_') {
r = get_rune(lexer);
}
back(lexer);
ident := string(file[start.. offset-1]);
match ident {
case "true": {
fmt.println("Found true!");
}
}
return Token{TokenType.Ident, ident};
}
return Token{TokenType.Unknown, "(Unknown)"};
//return Token{TokenType.Unknown, string(file[offset..])};
}
#import "fmt.odin";
#load "lexer.odin";
main :: proc() {
lexer := new(Lexer);
init_lexer(lexer, "test.b");
t := get_token(lexer);
for t.type != TokenType.EOF {
fmt.println(t);
t = get_token(lexer);
}
}
something // comment 123 blah
// comment
123
0xFAF
0b1010
{}
()
[]
,
.
-+/*
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment