Skip to content

Instantly share code, notes, and snippets.

@itsbth
Created September 28, 2018 20:35
Show Gist options
  • Save itsbth/a6018a4628a1d0f93df4842c532d7986 to your computer and use it in GitHub Desktop.
Save itsbth/a6018a4628a1d0f93df4842c532d7986 to your computer and use it in GitHub Desktop.
const std = @import("std");
const assert = std.debug.assert;
const mem = std.mem;
pub const Token = struct {
id: Id,
start: usize,
end: usize,
pub const Keyword = struct {
bytes: []const u8,
id: Id,
};
pub const keywords = []Keyword{
Keyword { .bytes = "var", .id = Id.Keyword_Var, },
};
fn getKeyword(bytes: []const u8) ?Id {
for (keywords) |kw| {
if (mem.eql(u8, kw.bytes, bytes)) {
return kw.id;
}
}
return null;
}
pub const Id = enum {
Invalid,
Eof,
Number,
Identifier,
Keyword_Var,
Semi,
LParen,
RParen,
LBrace,
RBrace,
LBracket,
RBracket,
Op_Plus,
Op_Minus,
Op_Star,
Op_Slash,
Op_Equal,
Op_EqualEqual,
Op_Less,
Op_LessEqual,
Op_Greater,
Op_GreaterEqual,
};
};
pub const Tokenizer = struct {
buffer: []const u8,
index: usize,
pub fn init(buffer: []const u8) Tokenizer {
return Tokenizer {
.buffer = buffer,
.index = 0,
};
}
pub fn dump(self: *Tokenizer, token: *const Token) void {
std.debug.warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
}
const State = enum {
Start,
Number,
NumberFraction,
Identifier,
Slash,
LineComment,
Equals,
Compare,
};
pub fn next(self: *Tokenizer) Token {
var state = State.Start;
var result = Token {
.id = Token.Id.Eof,
.start = self.index,
.end = undefined,
};
while (self.index < self.buffer.len) : (self.index += 1) {
const c = self.buffer[self.index];
switch (state) {
State.Start => switch (c) {
' ', '\n', '\t' => result.start += 1,
'a'...'z', 'A'...'Z', '_' => {
state = State.Identifier;
result.id = Token.Id.Identifier;
},
'0'...'9' => {
state = State.Number;
result.id = Token.Id.Number;
},
'+' => {
result.id = Token.Id.Op_Plus;
self.index += 1;
break;
},
'-' => {
result.id = Token.Id.Op_Minus;
self.index += 1;
break;
},
'*' => {
result.id = Token.Id.Op_Star;
self.index += 1;
break;
},
'/' => {
state = State.Slash;
},
'=' => {
state = State.Equals;
},
';' => {
result.id = Token.Id.Semi;
self.index += 1;
break;
},
'(' => {
result.id = Token.Id.LParen;
self.index += 1;
break;
},
')' => {
result.id = Token.Id.RParen;
self.index += 1;
break;
},
'<', '>' => {
result.id = if (c == '<') Token.Id.Op_Less else Token.Id.Op_Greater;
state = State.Compare;
},
else => std.debug.panic("can't handle {} at {}", c, self.index),
},
State.Number => switch (c) {
'0'...'9' => {},
'.' => state = State.NumberFraction,
else => break,
},
State.NumberFraction => switch (c) {
'0'...'9' => {},
else => break,
},
State.Identifier => switch (c) {
'a'...'z', 'A'...'Z', '0'...'9', '_' => {},
else => {
if (Token.getKeyword(self.buffer[result.start..self.index])) |id| {
result.id = id;
}
break;
}
},
State.Slash => switch (c) {
'/' => state = State.LineComment,
else => {
result.id = Token.Id.Op_Slash;
break;
},
},
State.LineComment => switch (c) {
'\n' => {
result.start = self.index + 1;
self.index += 1;
state = State.Start;
},
else => {},
},
State.Equals => switch (c) {
'=' => {
result.id = Token.Id.Op_EqualEqual;
self.index += 1;
break;
},
else => {
result.id = Token.Id.Op_Equal;
break;
}
},
State.Compare => switch (c) {
'=' => {
result.id = if (result.id == Token.Id.Op_Less)
Token.Id.Op_LessEqual
else Token.Id.Op_GreaterEqual;
self.index += 1;
break;
},
else => break,
}
}
}
result.end = self.index;
return result;
}
};
test "create new tokenizer" {
const ss = Tokenizer.init(&"text goes here");
}
test "tokenize number" {
testTokenize(&"1337 42.5", []Token.Id{ Token.Id.Number, Token.Id.Number });
}
test "tokenize identifier" {
testTokenize(&"var foo", []Token.Id{ Token.Id.Keyword_Var, Token.Id.Identifier });
}
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
var tokenizer = Tokenizer.init(source);
for (expected_tokens) |expected_token_id| {
const token = tokenizer.next();
if (token.id != expected_token_id) {
std.debug.panic("expected {}, found {}\n", @tagName(expected_token_id), @tagName(token.id));
}
}
const last_token = tokenizer.next();
std.debug.assert(last_token.id == Token.Id.Eof);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment