Skip to content

Instantly share code, notes, and snippets.

@Chubek
Created August 21, 2024 13:50
Show Gist options
  • Save Chubek/c4f23698d883bc2968f64bbf4174e6b7 to your computer and use it in GitHub Desktop.
Save Chubek/c4f23698d883bc2968f64bbf4174e6b7 to your computer and use it in GitHub Desktop.
Ramkal, a parser for ISO Pascal (in D)

ramkal.d contains a so-and-so ready parser for the ISO variant of the Pascal language. It was mainly an experiment. I am done with it.

You can look at the source for influence, and ideas.

Note: Several additional constructs have been defined.

import std.stdio, std.conv, std.typecons, std.algorithm, std.tuple,
std.variant, std.string, std.container, std.ascii;
import core.stdc.stdlib;
enum TokenKind
{
Keyword,
Ident,
Typeword,
Real,
HexInteger,
Integer,
String,
Operator,
Punctuation,
Assign,
Delimiter,
DoubleNewline,
}
alias TokenValue = string;
alias TokenList = SList!Token;
alias ASTNodeList = SList!ASTNode;
enum End_of_input = '\0';
struct Token
{
TokenKind kind;
TokenValue value;
this(TokenKind kind, TokenValue value)
{
this.kind = kind;
this.value = value;
}
}
enum Keywords = [
"and", "array", "begin", "case", "const", "div", "do", "downto", "else",
"end", "file", "for", "function", "goto", "if", "in", "out", "label", "mod",
"nil", "not", "of", "or", "procedure", "program", "record", "repeat",
"set", "string", "then", "to", "type", "until", "var", "while", "with",
"break", "continue", "using", "immutable", "pointer", "reference"
];
enum Typewords = [
"integer", "real", "char", "boolean", "string", "shortint", "byte", "word",
"longint", "cardinal", "smallint", "longword", "int64", "qword",
"single", "double", "extended", "comp", "currency", "unsigned"
];
enum Operators = [
"+", "-", "*", "/", "mod", "=", "<>", "<", ">", "<=", ">=", "and", "or",
"not", ":=", "++", "--"
];
enum OperatorPrecedence = [
"=": 1, "<>": 1, "<": 1, "<=": 1, ">": 1, ">=": 1, "or": 2, "+": 3, "-": 3,
"and": 4, "*": 5, "/": 5, "div": 5, "mod": 5, "in": 6, "shl": 7, "shr": 7
];
enum OperatorAssociativity = [
"=": "left", "<>": "left", "<": "left", "<=": "left", ">": "left",
">=": "left", "or": "left", "+": "left", "-": "left", "and": "left",
"*": "left", "/": "left", "div": "left", "mod": "left", "in": "left",
"shl": "left", "shr": "left"
];
class LexicalScanner
{
string input;
size_t pos;
bool anchored = true;
size_t line_num;
TokenList tokens;
class ScanError : Exception
{
this(string message)
{
super(message);
}
string toString() const
{
return "Lexical error at line: " ~ parse!size_t(line_num) ~ "; reason given: " ~ msg;
}
}
this(string input)
{
this.input = input;
this.pos = 0;
this.line_num = 1;
}
char peek()
{
if (this.pos < this.input.length)
{
return this.input[this.pos];
}
return End_of_input;
}
char advance()
{
if (this.pos < this.input.length)
{
return this.input[this.pos++];
}
return End_of_input;
}
void skipWhiteSpace()
{
while (" \t".canFind(peek()))
{
advance();
}
}
void skipNewline()
{
while ("\r\n".canFind(peek()))
{
advance();
}
}
void skipComment()
{
while (!"\r\n}*".canFind(peek()))
{
advance();
}
advance();
}
void handleAnchored()
{
if (anchored)
{
if ("{/".canFind(peek()))
{
advance();
skipComment();
}
anchored = false;
}
}
Token scanName()
{
TokenValue lexeme = null;
while (isAlphaNum(peek()) || peek() == '_')
{
lexeme ~= advance();
}
if (Operators.canFind(lexeme.toLower()))
{
return Token(TokenKind.Operator, lexeme.toLower());
}
else if (Typewords.canFind(lexeme.toLower()))
{
return Token(TokenKind.Typeword, lexeme.toLower());
}
else if (Keywords.canFind(lexeme.toLower()))
{
return Token(TokenKind.Keyword, lexeme.toLower());
}
return Token(TokenKind.Ident, lexeme);
}
Token scanNumber()
{
TokenValue lexeme = null;
bool is_hex = false;
if (peek() == '$')
{
is_hex = true;
advance();
}
while (isDigit(peek()) || peek() == '.' || (is_hex && isHexDigit(peek())))
{
lexeme ~= advance();
}
if (lexeme.count('.') > 1)
{
throw new ScanError("Invalid numeric token");
}
else if (lexeme.count('.') == 1 && is_hex)
{
throw new ScanError("Invalid hexadecimal integer");
}
if (is_hex)
{
return Token(TokenKind.HexInteger, scnaned_value);
}
else if (lexeme.count('.') == 1)
{
return Token(TokenKind.Real, lexeme);
}
else
{
return Token(TokenKind.Integer, lexeme);
}
}
Token scanString()
{
TokenValue lexeme = null;
auto quote = advance();
while (peek() != quote)
{
if (peek() == '\\')
{
advance();
auto ch = advance();
if (ch in EscapeChars)
{
lexeme ~= EscapeChars[ch];
}
else
{
throw new ScanError("Wrong escaped character");
}
continue;
}
else if (peek() == (quote == '"' ? '\'' : '"'))
{
throw new ScanError("Wrong quote to terminate string");
}
lexeme ~= advance();
}
advance();
return Token(TokenKind.String, "\\" ~ lexeme);
}
Token scanAssign()
{
if (peek() == '=')
{
advance();
return Token(TokenKind.Assign, ":=");
}
else
{
return Token(TokenKind.Assign, ":");
}
}
Token scanOperator()
{
auto ch1 = advance();
auto ch2 = advance();
auto lexeme = ch1 ~ ch2;
if (Operators.canFind(lexeme))
{
return Token(TokenKind.Operator, lexeme);
}
throw new ScanError("Unknown Operator");
}
Token scanDelimiter()
{
return Token(TokenKind.Delimiter, to!string(advance()));
}
Token scanPunctuation()
{
TokenValue lexeme = null;
while (".,;".canFind(peek()))
{
lexeme ~= advance();
}
return Token(TokenKind.Punctuation, lexeme);
}
void lexicallyScan()
{
while (peek() != End_of_input)
{
skipWhiteSpace();
handleAnchored();
if (isAlpha(peek()) || peek() == '_')
{
tokens.insert(scanName());
}
else if (isDigit(peek()) || peek() == '.' || peek() == '$')
{
tokens.insert(scanNumber());
}
else if ("\"'".canFind(peek()))
{
tokens.insert(scanString());
}
else if (peek() == ':')
{
advance();
tokens.insert(scanAssign());
}
else if (peek() == '.')
{
advanc();
if (peek() == '.')
{
advance();
tokens.insert();
}
}
else if ("+-*<>!@^&|=^".canFind(peek()))
{
tokens.insert(scanOperator());
}
else if (";,.".canFind(peek()))
{
tokens.insert(scanPunctuation());
}
else if ("()[]".canFind(peek()))
{
tokens.insert(scanDelimiter());
}
else if ("\r\n".canFind(peek()))
{
anchored = true;
advance();
if ("\r\n".canFind(peek()))
{
tokens.insert(Token(TokenKind.DoubleNewline, null));
}
skipNewline();
}
throw new ScanError("Unknown or misplaced character");
}
}
}
class SyntacticParser
{
TokenList tokens;
ASTNodeList absyn;
class ParseError : Exception
{
this(string message)
{
super(message);
}
string toString()
{
return "Parse error ocurred; " ~ msg;
}
}
this(TokenList tokens)
{
this.tokens = tokens;
}
Token peek()
{
if (!tokens.empty)
{
return tokens.front;
}
return null;
}
Token advance()
{
if (!tokens.empty)
{
auto token = tokens.front;
tokens.popFront();
return token;
}
return null;
}
Token match(TokenKind kind)
{
if (peek().kind == kind)
{
return advance();
}
return null;
}
Token match(TokenValue value)
{
if (peek().value == value)
{
return advance();
}
return null;
}
Token expect(TokenKind kind)
{
auto token = match(kind);
if (!token)
{
throw new ParseError("Expected different kind of token");
}
return token;
}
Token expect(TokenValue value)
{
auto token = match(value);
if (!token)
{
throw new ParseError("Expected different kind of token");
}
return token;
}
ASTNode parseNamesColl()
{
auto namescoll_node = new NamesCollNode();
auto worklist = new SList!NameNode;
while (peek().value != ":"
&& peek().value != ";"
&& peek().value != ")"
&& peek().value != "]")
{
auto name_node = new NameNode();
if (peek().kind == TokenKind.Keyword || peek().kind == TokenKind.Ident)
{
if (match("var"))
{
name_node.setReference();
}
else if (match("val"))
{
name_node.setValue();
}
else if (match("const"))
{
name_node.setConst();
}
else if (match("immutable"))
{
name_node.setImmutable();
}
else if (match("in"))
{
name_node.setIn();
}
else if (match("out"))
{
name_node.setOut();
}
if (peek().kind == TokenKind.Ident)
{
name_node.setNameValue(advnace().value);
if (match("."))
{
name_node.setLong();
worklist.insert(name_node);
name_node.setVirgin();
}
else if (match(",") || true)
{
worklist.insert(name_node);
name_node.setVirgin();
while (!worklist.empty)
{
name_node.addLongPart(name_node.popFront());
}
namescoll_node.addName(name_node);
}
}
}
}
return namescoll_node;
}
ASTNode parseTerm()
{
auto term_node = new TermNode();
if (peek().kind == TokenKind.Integer)
{
term_node.addInteger(advance().value);
}
else if (peek().kind == TokenKind.Real)
{
term_node.addReal(advance().value);
}
else if (peek().kind == TokenKind.String)
{
term_node.addString(advance().value);
}
else if (peek().kind == TokenKind.Ident)
{
auto name = advance().value;
if (match("("))
{
auto funcall_node = new FunCallNode();
funcall_node.addName(name);
while (!match(")"))
{
funcall_node.addArgument(parseExpr());
match(",");
}
term_node.addFunCall(funcall_node);
}
term_node.addVariable(name);
}
else if (match("("))
{
term_node.addSubexpr(parseExpr());
expect(")");
}
if (match("["))
{
term_node.addIndex(parseExprColl());
expect("]");
}
if (match(".."))
{
term_node.addRangePair(parseTerm());
}
return term_node;
}
ASTNode parseExpr()
{
auto expr_node = new ExprNode();
if (peek().kind == TokenKind.Operator)
{
expr_node.addPrefixOp(advance().value);
}
auto term_lhs = parseTerm();
if (peek().kind != TokenKind.Operator)
{
expr_node.addPrefixTerm(term_lhs);
return expr_node;
}
auto infix_operator = expect(TokenKind.Operator).value;
auto term_rhs = parseTerm();
expr_node.addInfix(term_lhs, infix_operator, term_rhs);
return expr_node;
}
ASTNode parseExprColl()
{
auto exprcoll_node = new ExprCollNode();
while (peek().value != ";"
&& peek().value != ")"
&& peek().value != "]")
{
exprcoll_node.addExpr(parseExpr());
match(",");
}
return exprcoll_node;
}
ASTNode parseStmt()
{
if (peek().kind == TokenKind.Keyword)
{
if (match("program"))
{
auto prog_node = new ProgramStmtNode();
prog_node.addName(parseNamesColl());
if (match("("))
{
prog_node.addParams(parseNamesColl());
expect(")");
}
expect(";");
return prog_node;
}
else if (match("const"))
{
auto conststmt_node = new ConstStmtNode();
while (!match(TokenKind.DoubleNewline))
{
conststmt_node.addDecl(parseConstDecl());
}
return conststmt_node;
}
else if (match("var"))
{
auto varstmt_node = new VarStmtNode();
while (!match(TokenKind.DoubleNewline))
{
varstmt_node.addDefn(parseVarDefn());
}
return varstmt_node;
}
else if (match("type"))
{
auto typestmt_node = new TypeStmtNode();
while (!match(TokenKind.DoubleNewline))
{
typestmt_node.addDefn(parseComplexTypeDefn());
}
return typestmt_node;
}
else if (match("label"))
{
auto labelstmt_node = new LabelStmtNode();
labelstmt_node.addLabels(parseNamesColl());
expect(";");
return labelstmt_node;
}
else if (match("function"))
{
auto fundefn_node = new FunDefnNode();
fundefn_node.addName(parseNamesColl());
if (match("("))
{
while (!match(")"))
{
fundefn_node.addParam(parseParam());
}
}
expect(":");
fundefn_node.addReturn(parseNamesColl());
expect(";");
if (match("var"))
{
while (peek().value != "begin")
{
fundefn_node.addLocalVar(parseVarDefn());
}
}
if (peek().value == "begin")
{
fundefn_node.addBody(parseStmt());
}
return fundefn_node;
}
else if (match("procedure"))
{
auto procdefn_node = new ProcDefnNode();
if (match("("))
{
while (!match(")"))
{
procdefn_node.addParam(parseParam());
}
}
expect(";");
if (match("var"))
{
while (peek().value != "begin")
{
procdefn_node.addLocalVar(parseVarDefn());
}
}
if (peek().value == "begin")
{
procdefn_node.addBody(parseStmt());
}
return procdefn_node;
}
else if (match("begin"))
{
auto block_node = new BlockNode();
while (match("end"))
{
block_node.addStmt(parseStmt());
}
if (match("."))
{
block_node.setTerminal();
}
else
{
expect(";");
}
return block_node;
}
else if (match("end"))
{
throw new ParseError("Dangling end");
}
else if (match("if"))
{
auto ifstmt_node = new IfStmtNode();
auto cond_expr = parseExpr();
if (match("in"))
{
cond_expr.addInBound(pareExpr());
}
auto main_stmt = parseStmt();
ifstmt_node.addPair(cond_expr, main_stmt);
if (match("else"))
{
ifstmt_node.addElse(parseStmt());
}
return ifstmt_node;
}
else if (match("else"))
{
throw new ParseError("Dangling else");
}
else if (match("in"))
{
throw new ParseError("Dangling in");
}
else if (match("for"))
{
auto forstmt_node = new ForStmtNode();
auto cond_expr = parseExpr();
if (match("to"))
{
cond_expr.addToBound(parseExpr());
}
else if (match("downto"))
{
cond_expr.addDownToBound(parseExpr());
}
expect("do");
auto main_stmt = parseStmt();
forstmt_node.addPair(cond_expr, main_stmt);
return forstmt_node;
}
else if (match("to"))
{
throw new ParseError("Dangling to");
}
else if (match("downto"))
{
throw new ParseError("Dangling downto");
}
else if (match("repeat"))
{
auto repeatstmt_node = new RepeatStmtNode();
auto main_stmt = parseStmt();
expect("until");
auto cond_expr = parseExpr();
repeatstmt_node.addPair(cond_expr, main_stmt);
return repeatstmt_node;
}
else if (match("until"))
{
throw new ParseError("Dangling until");
}
else if (match("while"))
{
auto whilestmt_node = new WhileStmtNode();
auto cond_expr = parseExpr();
expect("do");
auto main_stmt = parseStmt();
whilestmt_node.addPair(cond_expr, main_stmt);
return whilestmt_node;
}
else if (match("do"))
{
throw new ParseError("Dangling do");
}
else if (match("return"))
{
auto returnstmt_node = new ReturnStmtNode();
returnstmt_node.addExpr(parseExpr());
expect(";");
return returnstmt_node;
}
else if (match("goto"))
{
auto gotostmt_node = new GotoStmtNode();
gotostmt_node.addName(parseNamesColl());
expect(";");
return gotostmt_node;
}
else if (match("break"))
{
return new BreakStmtNode();
}
else if (match("continue"))
{
return new ContinueStmtNode();
}
else if (match("case"))
{
auto casestmt_node = new CaseStmtNode();
casestmt_node.addDiscriminant(parseExpr());
expect("of");
while (!match("else"))
{
auto clause_expr = parseExpr();
auto clause_body = parseStmt();
casestmt_node.addClause(clause_expr, clause_body);
}
casestmt_node.addDefaultClause(parseStmt());
expect("end");
expect(";");
return casestmt_node;
}
else if (match("using"))
{
auto usingstmt_node = new UsingStmtNode();
usingstmt_node.addNames(parseNamesColl());
expect(";");
return usingstmt_node;
}
else
{
throw new ParseError("Wrongful use of keyword " ~ peek().value);
}
}
else if (peek().kind == TokenKind.Ident)
{
auto assign_node = new AssignStmtNode();
assign_node.addLhs(parseNamesColl());
expect(":=");
while (!match(";"))
{
assign_node.addRhs(parseExpr());
match(",");
}
return assign_node;
}
else
{
throw new ParseError("Misplaced token " ~ peek().value);
}
}
ASTNode parseConstDecl()
{
auto constdecl_node = new ConstDeclNode();
constdecl_node.addLhs(parseNamesColl());
constdecl_node.addRhs(parseExpr());
expect(";");
return constdecl_node;
}
ASTNode parseVarDefn()
{
auto vardefn_node = new VarDefnNode();
vardefn_node.addLhs(parseNamesColl());
if (match(":"))
{
vardefn_node.addType(parseComplexType());
}
if (match("="))
{
vardefn_node.addRhs(parseExpr());
}
expect(";");
return vardefn_node;
}
ASTNode parseComplexTypeDefn()
{
auto typedefn_node = new TypeDefnNode();
typedefn_node.addLhs(parseNamesColl());
expect("=");
typedefn_node.addRhs(parseComplexType());
expect(";");
return typedefn_node;
}
ASTNode parseParam()
{
auto param_node = new ParamNode();
param_node.addLhs(parseNamesColl());
expect(":");
param_node.addRhs(parseComplexType());
return param_node;
}
ASTNode parseComplexType()
{
auto complextype_node = new ComplexTypeNode();
auto qual_node = new QualNode();
auto type_node = new TypeNode();
while (peek().value != ";")
{
if (peek().kind == TokenKind.Typeword)
{
auto typeword = advance().value;
type_node.setBuiltin(typeword);
if (typeword == "file")
{
match("of");
}
}
else if (peek().kind == TokenKind.Ident)
{
type_node.setNames(parseNamesColl());
}
else if (peek().kind == TokenKind.Keyword)
{
if (match("record"))
{
type_node.setRecord(parseRecord());
complextype_node.addRecordNode(type_node);
return complextype_node;
}
else if (match("array"))
{
type_node.setArray();
match("of");
if (match("["))
{
type_node.setIndex(parseExprColl());
expect("]");
}
}
else if (match("packed"))
{
qual_node.setPacked();
}
else if (match("const"))
{
qual_node.setConst();
}
else if (match("immutable"))
{
qual_node.setImmutable();
}
else if (match("pointer"))
{
qual_node.setPointer();
expect("to");
}
else if (match("reference"))
{
qual_node.setReference();
expect("to");
}
}
else if (match("("))
{
type_node.setEnum(parseNamesColl());
expect(")");
continue;
}
else if (match("^"))
{
qual_node.setPointer();
}
if (!type_node.isVirgin())
{
complextype_node.addTypeNode(type_node);
}
if (!qual_node.isVirgin())
{
complextype_node.addQualNode(qual_node);
}
type_node.setVirgin();
qual_node.setVirgin();
}
return complextype_node;
}
ASTNode parseRecord()
{
auto record_node = new RecordNode();
while (true)
{
if (match("end"))
{
if (record_node.isVirgin())
{
throw new ParseError("Empty record");
}
expect(";");
return record_node;
}
else if (match("case"))
{
record_node.addVariantDiscrimNames(parseNamesColl());
if (match(":"))
{
recrod_node.addVariantDiscrimType(parseComplexType());
}
while (true)
{
auto var_names = parseNamesColl();
expect(":");
auto var_type = parseComplexType();
record_node.addVariantField(var_names, var_type);
if (match("end"))
{
expect(";");
return record_node;
}
}
}
auto fixed_name = parseNamesColl();
expect(":");
auto fixed_type = parseComplexType();
record_node.addFixedField(fixed_name, fixed_type);
}
return record_node;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment