Skip to content

Instantly share code, notes, and snippets.

@flashburns
Created April 15, 2025 03:24
Show Gist options
  • Save flashburns/b946e4d530f3f20d461a6ff90d6f86cc to your computer and use it in GitHub Desktop.
Save flashburns/b946e4d530f3f20d461a6ff90d6f86cc to your computer and use it in GitHub Desktop.

Currently writing a tokenizer and parser to begin implementing a scripting language, currently I am trying to keep it KISS, its runtime is going to be a simple tree walking interpreter, for the GC I will be piggy backing on the D runtime GC. The goal of this is to have a simple base to then experiment on for figuring out language specifics. For the base I am following the basic structure of the intepreter from the "Writing An Interpreter In Go" book, but this time it's in D and ofc I am making changes for my end goal instead of doing it just for learning. (its a good book, I recommend it).

module script;
@safe:
import std.stdio : writeln;
import std.conv : text, to;
// -------------------------- TOKENIZER --------------------------
struct Token {
enum Type{
INVALID,
START,
END,
Identifier, /// foobar, x, y, z
IntLiteral, /// 1234
Assign, /// =
Plus, /// +
Minus, /// -
Bang, /// !
Asterisk, /// *
Slash, /// /
LessThen, /// <
GreaterThan, /// >
Equal, /// ==
NotEqual, /// !=
Comma, /// ,
SemiColon, /// ;
L_Parenthesis, /// (
R_Parenthesis, /// )
L_Brace, /// {
R_Brace, /// }
// keywords
Function, /// function
Var, /// var
True, /// true
False, /// false
If, /// if
Else, /// else
Return, /// return
}
Type type;
string literal;
}
// This should be not too hard to make @nogc
struct Lexer {
string src;
Token current_token = Token(Token.Type.START);
Token front() {
if(this.current_token.type == Token.Type.START)
this.current_token = this.consume_token(this.src);
return current_token;
}
void popFront() {
this.current_token = this.consume_token(this.src);
}
bool empty () const { return this.current_token.type == Token.Type.END; }
bool consume (uint count = 1) {
while (count-- > 0) {
if(this.empty) return false;
this.popFront();
}
return true;
}
Token peek (uint count = 1) const {
Lexer copy = this;
if (copy.consume(count)) {
return copy.front;
} else {
return Token();
}
}
private static Token consume_token(ref string src) {
if(src.length == 0) return Token(Token.Type.END);
import std.uni : isAlpha, isAlphaNum, isNumber;
char current_char = src[0];
void consume(ulong eat = 1) {
if (src.length - eat > 0) {
src = src[eat..$];
current_char = src[0];
} else {
src = null;
current_char = '\0';
}
}
void consume_whitespace() {
while(true){
if(current_char == ' ' || current_char == '\r' || current_char == '\n' || current_char == '\t')
consume();
else
break;
}
}
bool consume_keyword(string key_word) {
import std.string : startsWith;
if(src.startsWith(key_word)){
consume(key_word.length);
return true;
}
return false;
}
bool check_next_char(char c, uint offset = 1) {
if(src.length >= offset && src[offset] == c)
return true;
return false;
}
Token consume_identifier() {
ulong len;
foreach(i, ident_char; src){
if(ident_char.isAlphaNum == false) break;
len++;
}
scope(exit) consume(len);
return Token(Token.Type.Identifier, src[0..len]);
}
Token consume_numeric_literal() {
ulong len;
foreach(i, ident_char; src){
if(ident_char.isNumber == false) break;
len++;
}
scope(exit) consume(len);
return Token(Token.Type.IntLiteral, src[0..len]);
}
consume_whitespace();
if(src.length == 0) return Token(Token.Type.END);
switch(current_char){
case '\0': return Token();
case '=':
if (check_next_char('=')) {
consume(2);
return Token(Token.Type.Equal);
} else {
consume();
return Token(Token.Type.Assign);
}
break;
case '+': consume(); return Token(Token.Type.Plus);
case '-': consume(); return Token(Token.Type.Minus);
case '!':
if (check_next_char('=')) {
consume(2);
return Token(Token.Type.NotEqual);
} else {
consume();
return Token(Token.Type.Bang);
}
break;
case '*': consume(); return Token(Token.Type.Asterisk);
case '/': consume(); return Token(Token.Type.Slash);
case '<': consume(); return Token(Token.Type.LessThen);
case '>': consume(); return Token(Token.Type.GreaterThan);
case ',': consume(); return Token(Token.Type.Comma);
case ';': consume(); return Token(Token.Type.SemiColon);
case '(': consume(); return Token(Token.Type.L_Parenthesis);
case ')': consume(); return Token(Token.Type.R_Parenthesis);
case '{': consume(); return Token(Token.Type.L_Brace);
case '}': consume(); return Token(Token.Type.R_Brace);
default:
if (consume_keyword("if")) {
return Token(Token.Type.If);
} else if (consume_keyword("else")) {
return Token(Token.Type.Else);
} else if (consume_keyword("return")) {
return Token(Token.Type.Return);
} else if (consume_keyword("true")) {
return Token(Token.Type.True);
} else if (consume_keyword("false")) {
return Token(Token.Type.False);
} else if (consume_keyword("var")) {
return Token(Token.Type.Var);
} else if (consume_keyword("function")) {
return Token(Token.Type.Function);
} else if (current_char.isNumber) {
return consume_numeric_literal();
} else if (current_char.isAlpha){
// is identifier
return consume_identifier();
} else {
writeln(i"GOT `$(current_char)`, char code: $(current_char.to!int)".text);
// INVALID
consume();
return Token();
}
break;
}
}
}
// -------------------------- PARSER --------------------------
static string out_of_tokens = "Ran out of tokens when parsing.";
import std.exception : enforce;
class Statement {
static Statement parse(ref ParserMeta meta, ref Lexer lexer) {
if (lexer.front == Token(Token.Type.Var)) {
enforce(lexer.consume(), out_of_tokens);
auto statment = VarStatment.parse(meta, lexer);
enforce(lexer.front == Token(Token.Type.SemiColon), i"Exception SemiColon to end statement, got: $(lexer.front)".text);
enforce(lexer.consume(), out_of_tokens);
return statment;
} else if (lexer.front == Token(Token.Type.Return)) {
enforce(lexer.consume(), out_of_tokens);
auto statment = Return.parse(meta, lexer);
enforce(lexer.front == Token(Token.Type.SemiColon), i"Exception SemiColon to end statement, got: $(lexer.front)".text);
enforce(lexer.consume(), out_of_tokens);
return statment;
} else {
auto statment = Expression.parse(meta, lexer);
enforce(lexer.front == Token(Token.Type.SemiColon), i"Exception SemiColon to end statement, got: $(lexer.front)".text);
enforce(lexer.consume(), out_of_tokens);
return statment;
}
//throw new Exception("Invalid Statement.");
}
override string toString () const {
return this.toString;
}
}
class Return : Statement {
Expression value;
this(Expression value) {
this.value = value;
}
static Return parse(ref ParserMeta meta, ref Lexer lexer) {
return new Return( Expression.parse(meta, lexer) );
}
override string toString () const {
return i"return $(this.value)".text;
}
}
class VarStatment : Statement {
string name;
Expression value;
this(string name, Expression value){
this.name = name;
this.value = value;
}
override string toString () const {
return i"var $(this.name) = $(this.value)".text;
}
static VarStatment parse(ref ParserMeta meta, ref Lexer lexer) {
enforce(lexer.front.type == Token.Type.Identifier, i"Expected Identifier, got $(lexer.front).".text);
enforce(lexer.peek == Token(Token.Type.Assign), i"Expected Assignment, got $(lexer.peek).".text);
string name_literal = lexer.front.literal;
enforce(lexer.consume(2));
Expression value = Expression.parse(meta, lexer);
return new VarStatment(name_literal, value);
}
}
class Identifier : Expression {
string name;
this(string name) {
this.name = name;
}
static Identifier prefix_parser(ref ParserMeta meta, ref Lexer lexer) {
auto ident = new Identifier(lexer.front.literal);
lexer.consume();
return ident;
}
override string toString () const {
return this.name;
}
}
class BoolLiteral : Expression {
bool value;
this(bool value) {
this.value = value;
}
static BoolLiteral prefix_parser(ref ParserMeta meta, ref Lexer lexer) {
auto literal = new BoolLiteral(lexer.front.type == Token.Type.True);
lexer.consume();
return literal;
}
override string toString () const {
return this.value.to!string;
}
}
class IntLiteral : Expression {
int value;
this(string value) {
this.value = value.to!int;
}
static IntLiteral prefix_parser(ref ParserMeta meta, ref Lexer lexer) {
auto literal = new IntLiteral(lexer.front.literal);
lexer.consume();
return literal;
}
override string toString () const {
return this.value.to!string;
}
}
class Expression : Statement {
static Expression parse(ref ParserMeta meta, ref Lexer lexer, ParserPrecedences precedence = ParserPrecedences.LOWEST) {
assert(lexer.front.type != Token.Type.SemiColon);
ParserMeta.PrefixParser prefix_parser = meta.prefix.get(lexer.front.type, null);
enforce(prefix_parser, i"No parser for prefix for $(lexer.front.type).".text);
Expression left = prefix_parser(meta, lexer);
// infix parser (the binary operator parser)
while(true){
// If the next token is a semicolon, that means we are at the end of the expression and there is no (Expression right) for the infix parser to parse.
if( lexer.peek.type == Token.Type.SemiColon ) break;
// Make sure that the precedence (of the left expression) is less than the precedence of the current operator/token.
if( precedence >= meta.get_infix_precedence(lexer.front.type) ) break;
// After checking if we have enough precedence and that we are not a the end of expression, attempt to find the binary operator parser for this token.
auto infix = meta.infix.get(lexer.front.type, null);
// If no binary operator parser is found (infix is null), then exit infix parser loop logic.
if(infix == null) break;
left = infix(meta, lexer, left);
}
return left;
}
override string toString () const {
return this.toString;
}
}
Expression grouped_expression_prefix_parser(ref ParserMeta meta, ref Lexer lexer) {
assert(lexer.front.type == Token.Type.L_Parenthesis);
lexer.consume();
Expression expression = Expression.parse(meta, lexer);
enforce(lexer.front.type == Token.Type.R_Parenthesis);
lexer.consume();
return expression;
}
class PrefixExpression : Expression {
string operator;
Expression right;
this(string operator, Expression right) {
this.operator = operator;
this.right = right;
}
static PrefixExpression prefix_parser(ref ParserMeta meta, ref Lexer lexer) {
string operator = lexer.front.type.to!string;
lexer.consume();
Expression right = Expression.parse(meta, lexer, /*meta.get_prefix_precedence(lexer.front.type)*/ ParserPrecedences.PREFIX);
return new PrefixExpression(operator, right);
}
override string toString () const {
// TODO: make the operator map to correct code.
return i`($(this.operator) $(this.right))`.text;
}
}
class InfixExpression : Expression {
string operator;
Expression right;
Expression left;
this(string operator, Expression left, Expression right) {
this.operator = operator;
this.left = left;
this.right = right;
}
override string toString () const {
// TODO: make the operator map to correct code.
return i`($(this.left) $(this.operator) $(this.right))`.text;
}
static InfixExpression infix_parser(ref ParserMeta meta, ref Lexer lexer, Expression left) {
//writeln("infix_parser: ", lexer.front);
string operator = lexer.front.type.to!string;
enforce(lexer.front.type in meta.infix, i"Invalid infix, got: $(lexer.front).".text);
ParserPrecedences precedence = meta.get_infix_precedence(lexer.front.type);
lexer.consume();
assert(lexer.front.type != Token.Type.SemiColon, "expression ended early (ran into semicolon)");
Expression right = Expression.parse(meta, lexer, precedence);
return new InfixExpression(operator, left, right);
}
}
enum ParserPrecedences {
LOWEST,
EQUALS, // == or !=
LESSGREATER, // > or <
SUM, // + or -
PRODUCT, // / or *
PREFIX, // -X or !X
CALL, // cool_func(X)
}
struct ParserMeta {
alias PrefixParser = Expression function(ref ParserMeta meta, ref Lexer lexer) @safe;
alias InfixParser = Expression function(ref ParserMeta meta, ref Lexer lexer, Expression left) @safe;
PrefixParser[Token.Type] prefix;
InfixParser[Token.Type] infix;
ParserPrecedences[Token.Type] infix_precedences;
//ParserPrecedences[Token.Type] prefix_precedences;
ParserPrecedences get_infix_precedence(Token.Type token_type) { return this.infix_precedences.get(token_type, ParserPrecedences.LOWEST); }
//ParserPrecedences get_prefix_precedence(Token.Type token_type) { return this.prefix_precedences.get(token_type, ParserPrecedences.LOWEST); }
void setup_default() {
this.prefix[Token.Type.Bang] = &PrefixExpression.prefix_parser;
this.prefix[Token.Type.Minus] = &PrefixExpression.prefix_parser;
//this.prefix_precedences[Token.Type.Bang] = ParserPrecedences.PREFIX;
//this.prefix_precedences[Token.Type.Minus] = ParserPrecedences.PREFIX;
this.prefix[Token.Type.Identifier] = &Identifier.prefix_parser;
this.prefix[Token.Type.IntLiteral] = &IntLiteral.prefix_parser;
this.prefix[Token.Type.True] = &BoolLiteral.prefix_parser;
this.prefix[Token.Type.False] = &BoolLiteral.prefix_parser;
this.prefix[Token.Type.L_Parenthesis] = &grouped_expression_prefix_parser;
this.infix_precedences[Token.Type.NotEqual] = ParserPrecedences.EQUALS;
this.infix_precedences[Token.Type.Equal] = ParserPrecedences.EQUALS;
this.infix[Token.Type.Equal] = &InfixExpression.infix_parser;
this.infix_precedences[Token.Type.NotEqual] = ParserPrecedences.EQUALS;
this.infix[Token.Type.NotEqual] = &InfixExpression.infix_parser;
this.infix_precedences[Token.Type.LessThen] = ParserPrecedences.LESSGREATER;
this.infix[Token.Type.LessThen] = &InfixExpression.infix_parser;
this.infix_precedences[Token.Type.GreaterThan] = ParserPrecedences.LESSGREATER;
this.infix[Token.Type.GreaterThan] = &InfixExpression.infix_parser;
this.infix_precedences[Token.Type.Plus] = ParserPrecedences.SUM;
this.infix[Token.Type.Plus] = &InfixExpression.infix_parser;
this.infix_precedences[Token.Type.Minus] = ParserPrecedences.SUM;
this.infix[Token.Type.Minus] = &InfixExpression.infix_parser;
this.infix_precedences[Token.Type.Slash] = ParserPrecedences.PRODUCT;
this.infix[Token.Type.Slash] = &InfixExpression.infix_parser;
this.infix_precedences[Token.Type.Asterisk] = ParserPrecedences.PRODUCT;
this.infix[Token.Type.Asterisk] = &InfixExpression.infix_parser;
}
}
Statement[] parse(Lexer lexer) {
ParserMeta meta;
Statement[] program_statments;
meta.setup_default();
while (lexer.empty == false) {
program_statments ~= Statement.parse(meta, lexer);
}
return program_statments;
}
#!/usr/bin/env -S pal shebang rdmd -g -i -I={{PAL_PROJECT_DIR}}/enigma/src -I={{PAL_PROJECT_DIR}}/playground/WritingAnInterpreterInGoBook/src
@safe:
import enigma.core;
import std.stdio : writeln;
import std.conv : text;
void main() {
writeln("start...");
test();
writeln("end...");
}
void print_by_token(T)(T expected, string src) {
import script : Lexer, Token;
uint i;
foreach (got; Lexer(src: src)) {
if (got == expected[i]) {
writeln(got);
} else {
writeln(i"expect: $(expected[i]) vs $(got)".text);
break;
}
i++;
}
}
void test() {
import script : Lexer, Token, parse;
import std.algorithm : equal, each, map;
import std.array : array;
// TEST 1
//Lexer(src: `=+(){},;`).array.writeln;
(){
enum expected = [
Token(Token.Type.Assign),
Token(Token.Type.Plus),
Token(Token.Type.L_Parenthesis),
Token(Token.Type.R_Parenthesis),
Token(Token.Type.L_Brace),
Token(Token.Type.R_Brace),
Token(Token.Type.Comma),
Token(Token.Type.SemiColon),
];
assert(Lexer(src: `=+(){},;`).equal(expected));
// test that this works at compile time too.
static assert(Lexer(src: `=+(){},;`).equal(expected));
}();
// TEST 2
(){
enum src = `
var five = 5;
var ten = 10;
var add = function(x, y) {
x + y;
};
var result = add(five, ten);
`.dedent;
enum expected = [
// var five = 5;
Token(Token.Type.Var), Token(Token.Type.Identifier, "five"), Token(Token.Type.Assign), Token(Token.Type.IntLiteral, "5"), Token(Token.Type.SemiColon),
// var ten = 10;
Token(Token.Type.Var), Token(Token.Type.Identifier, "ten"), Token(Token.Type.Assign), Token(Token.Type.IntLiteral, "10"), Token(Token.Type.SemiColon),
// var add = function(x, y) {
Token(Token.Type.Var), Token(Token.Type.Identifier, "add"), Token(Token.Type.Assign), Token(Token.Type.Function), Token(Token.Type.L_Parenthesis), Token(Token.Type.Identifier, "x"), Token(Token.Type.Comma), Token(Token.Type.Identifier, "y"), Token(Token.Type.R_Parenthesis), Token(Token.Type.L_Brace),
// x + y;
Token(Token.Type.Identifier, "x"), Token(Token.Type.Plus), Token(Token.Type.Identifier, "y"), Token(Token.Type.SemiColon),
// };
Token(Token.Type.R_Brace), Token(Token.Type.SemiColon),
// var result = add(five, ten);
Token(Token.Type.Var), Token(Token.Type.Identifier, "result"), Token(Token.Type.Assign), Token(Token.Type.Identifier, "add"), Token(Token.Type.L_Parenthesis), Token(Token.Type.Identifier, "five"), Token(Token.Type.Comma), Token(Token.Type.Identifier, "ten"), Token(Token.Type.R_Parenthesis), Token(Token.Type.SemiColon),
];
//expected.print_by_token(src);
assert(Lexer(src: src).equal(expected));
static assert(Lexer(src: src).equal(expected));
}();
// TEST 3
(){
enum src = `
!-/*5;
5 < 10 > 5;
if (5 < 10) {
return true;
} else {
return false;
}
10 == 10;
10 != 9;
`.dedent;
enum expected = [
// !-/*5;
Token(Token.Type.Bang), Token(Token.Type.Minus), Token(Token.Type.Slash), Token(Token.Type.Asterisk), Token(Token.Type.IntLiteral, "5"), Token(Token.Type.SemiColon),
// 5 < 10 > 5
Token(Token.Type.IntLiteral, "5"), Token(Token.Type.LessThen), Token(Token.Type.IntLiteral, "10"), Token(Token.Type.GreaterThan), Token(Token.Type.IntLiteral, "5"), Token(Token.Type.SemiColon),
// if (5 < 10) {
Token(Token.Type.If), Token(Token.Type.L_Parenthesis), Token(Token.Type.IntLiteral, "5"), Token(Token.Type.LessThen), Token(Token.Type.IntLiteral, "10"), Token(Token.Type.R_Parenthesis), Token(Token.Type.L_Brace),
// return true;
Token(Token.Type.Return), Token(Token.Type.True), Token(Token.Type.SemiColon),
// } else {
Token(Token.Type.R_Brace), Token(Token.Type.Else), Token(Token.Type.L_Brace),
// return false;
Token(Token.Type.Return), Token(Token.Type.False), Token(Token.Type.SemiColon),
// }
Token(Token.Type.R_Brace),
// 10 == 10;
Token(Token.Type.IntLiteral, "10"), Token(Token.Type.Equal), Token(Token.Type.IntLiteral, "10"), Token(Token.Type.SemiColon),
// 10 != 9;
Token(Token.Type.IntLiteral, "10"), Token(Token.Type.NotEqual), Token(Token.Type.IntLiteral, "9"), Token(Token.Type.SemiColon),
];
//expected.print_by_token(src);
assert(Lexer(src: src).equal(expected));
static assert(Lexer(src: src).equal(expected));
}();
// TEST 4
(){
//parse(Lexer(src: `var foobar=joe;`)).each!writeln;
assert( parse(Lexer(src: `var foobar=joe;`)).map!(s => s.toString).equal([`var foobar = joe`]) );
}();
//writeln("--------------------");
// TEST 5
(){
//parse(Lexer(src: `! 5;`)).each!writeln;
assert( parse(Lexer(src: `! 5;`)).map!(s => s.toString).equal([`(Bang 5)`]) );
}();
//writeln("--------------------");
// TEST 6
(){
//parse(Lexer(src: `var x = 5 + 6 * -2;`)).map!(s => s.toString).each!writeln;
assert( parse(Lexer(src: `var x = 5 + 6 * -2;`)).map!(s => s.toString).equal([`var x = (5 Plus (6 Asterisk (Minus 2)))`]) );
}();
//writeln("--------------------");
// TEST 7
(){
//parse(Lexer(src: `a + b * c + d / e - f;3 + 4 * 5 == 3 * 1 + 4 * 5;true == false == false;!-a;!false == !true != true;4+2*2;!4+2*2;!(4+2)*2;`)).map!(s => s.toString).each!writeln;
scope parser = parse(Lexer(src: `
a + b * c + d / e - f;
3 + 4 * 5 == 3 * 1 + 4 * 5;
!-a;
true == false == false;
!false == !true != true;
4+2*2;!4+2*2;!(4+2)*2;
`.dedent));
static string[] expected_prased_statments = [
`(((a Plus (b Asterisk c)) Plus (d Slash e)) Minus f)`,
`((3 Plus (4 Asterisk 5)) Equal ((3 Asterisk 1) Plus (4 Asterisk 5)))`,
`(Bang (Minus a))`,
`((true Equal false) Equal false)`,
`(((Bang false) Equal (Bang true)) NotEqual true)`,
`(4 Plus (2 Asterisk 2))`,
`((Bang 4) Plus (2 Asterisk 2))`,
`((Bang (4 Plus 2)) Asterisk 2)`,
];
uint index;
foreach(statment; parser) {
string prased_statment = statment.toString;
assert(prased_statment == expected_prased_statments[index], i"got `$(prased_statment)` vs expected `$(expected_prased_statments[index])`.".text);
index += 1;
}
}();
// TEST 8
(){
}();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment