Skip to content

Instantly share code, notes, and snippets.

@zicklag
Last active January 1, 2019 23:56
Show Gist options
  • Save zicklag/c2a6060452759ce13864e43135e856f3 to your computer and use it in GitHub Desktop.
Save zicklag/c2a6060452759ce13864e43135e856f3 to your computer and use it in GitHub Desktop.
A WIP Ink Parser for Haxe.
import hxparse.Ruleset;
import hxparse.Position;
import hxparse.LexerTokenSource;
import hxparse.ParserBuilder;
import hxparse.RuleBuilder;
import hxparse.Lexer;
enum LexerToken {
// Brackets
TParenOpen;
TParenClose;
TSquareOpen;
TSquareClose;
TCurlyOpen;
TCurlyClose;
// Keywords
TVar;
TConst;
TAnd;
TOr;
TNot;
// Symbols
TColon;
TStar;
TPlus;
TDash;
TPipe;
TAmpersand;
TBang;
TTilde;
TDot;
TArrow;
TTrippleEqual;
TEqual;
TGlue;
TLineComment(s:String);
TBlockComment(s:String);
TNewline;
// Basic Containers
/**A name that can be used as an identifier**/
TIdentifier(s:String);
TChar(s:String);
TString(s:String);
TBool(b:Bool);
TNumber(n:Float);
TDivert;
// Other
TEof;
}
class LexerTokenPrinter {
public static function print(token:LexerToken) {
return switch (token) {
case TParenOpen: "(";
case TParenClose: ")";
case TSquareOpen: "[";
case TSquareClose: "]";
case TCurlyOpen: "{";
case TCurlyClose: "}";
// Keywords
case TVar: "VAR";
case TConst: "CONST";
case TAnd: "and";
case TOr: "or";
case TNot: "not";
// Symbols
case TColon: ":";
case TStar: "*";
case TPlus: "+";
case TDash: "-";
case TPipe: "|";
case TAmpersand: "&";
case TBang: "!";
case TTilde: "~";
case TDot: ".";
case TArrow: "->";
case TTrippleEqual: "===";
case TEqual: "=";
case TGlue: "<>";
case TLineComment(s): '//$s';
case TBlockComment(s): '/*$s*/';
case TNewline: "\n";
// Basic Containers
case TIdentifier(s): s;
case TChar(s): s;
case TString(s): s;
case TBool(b): Std.string(b);
case TNumber(n): Std.string(n);
case TDivert: "->";
// Other
case TEof: "EOF";
}
}
}
class InkLexer extends Lexer implements RuleBuilder {
static var buf = new StringBuf();
static final whitespace = "[ \t]*";
// Token rules
@:ruleHelper static final identifier = "[a-zA-Z_\\-][a-zA-Z0-9_\\-]*" => TIdentifier(lexer.current);
@:ruleHelper static final escapedCharacter = "\\\\." => TChar(lexer.current.split("")[1]);
@:ruleHelper static final lineComment = "//[^\n]+" => TLineComment(lexer.current.split("").slice(2).join(""));
@:ruleHelper static final blockComment = "/\\*" => {
buf = new StringBuf();
lexer.token(blockCommentTokens);
TBlockComment(buf.toString());
};
@:ruleHelper static final arrow = "->" + whitespace => TArrow;
@:ruleHelper static final newline = "\n" => TNewline;
@:ruleHelper static final character = "." => TChar(lexer.current);
@:ruleHelper static final eof = "" => TEof;
@:ruleHelper static final varToken = "VAR" + whitespace => TVar;
@:ruleHelper static final constToken = "CONST" + whitespace => TConst;
@:ruleHelper static final and = "and" + whitespace => TAnd;
@:ruleHelper static final or = "or" + whitespace => TOr;
@:ruleHelper static final not = "not" + whitespace => TNot;
@:ruleHelper static final star = "\\*" + whitespace => TStar;
@:ruleHelper static final plus = "\\+" + whitespace => TPlus;
@:ruleHelper static final dash = "-" + whitespace => TDash;
@:ruleHelper static final tilde = "\\~" + whitespace => TTilde;
@:ruleHelper static final pipe = "|" + whitespace => TPipe;
@:ruleHelper static final ampersand = "&" + whitespace => TAmpersand;
@:ruleHelper static final bang = "!" + whitespace => TBang;
@:ruleHelper static final colon = ":" => TBang;
@:ruleHelper static final dot = "\\." => TDot;
@:ruleHelper static final arrow = "->" + whitespace => TArrow;
@:ruleHelper static final trippleEqual = "===" + whitespace => TTrippleEqual;
@:ruleHelper static final equal = "=" + whitespace => TEqual;
@:ruleHelper static final glue = "<>" => TGlue;
@:ruleHelper static final curlyOpen = "{" => TCurlyOpen;
@:ruleHelper static final curlyClose = "}" => TCurlyClose;
@:ruleHelper static final squareOpen = "[" => TSquareOpen;
@:ruleHelper static final squareClose = "]" => TSquareClose;
@:ruleHelper static final parenOpen = "\\(" => TParenOpen;
@:ruleHelper static final parenClose = "\\)" => TParenClose;
/**Tokens matched from top level story**/
static public final storyTokens = @:rule [
escapedCharacter,
lineComment,
blockComment,
varToken,
constToken,
curlyOpen,
curlyClose,
star,
plus,
dash,
tilde,
arrow,
glue,
trippleEqual,
equal,
whitespace => lexer.token(storyTokens), // Skip whitespace
newline,
character,
eof
// Tokens yet to be placed
// "\\|" => TPipe,
// "\\&" => TAmpersand,
// "\\!" => TBang,
];
/**Tokens matched inside of ink content**/
static public final contentTokens = @:rule [
escapedCharacter,
blockComment,
lineComment,
curlyOpen,
curlyClose,
glue,
arrow,
newline,
character,
eof
];
/**Tokens matched inside of options**/
static public final optionDefTokens = @:rule [
escapedCharacter,
blockComment,
lineComment,
parenOpen,
parenClose,
curlyOpen,
curlyClose,
squareOpen,
squareClose,
star,
plus,
newline,
character,
eof
];
/**Tokens matched inside of option content**/
static public final optionContentTokens = @:rule [
escapedCharacter,
blockComment,
lineComment,
curlyOpen,
curlyClose,
squareOpen,
squareClose,
glue,
arrow,
newline,
character,
eof
];
/**Tokens matched inside of gathers**/
static public final gatherDefTokens = @:rule [
escapedCharacter,
blockComment,
lineComment,
parenOpen,
parenClose,
curlyOpen,
curlyClose,
arrow,
dash,
newline,
character,
eof
];
static public final alternativeConditionTokens = @:rule [
escapedCharacter,
blockComment,
lineComment,
ampersand,
bang,
tilde,
colon,
and,
or,
not,
dot,
identifier,
curlyClose,
newline,
character,
eof
];
static public final alternativeContentTokens = @:rule [
escapedCharacter,
blockComment,
lineComment,
curlyOpen,
curlyClose,
pipe,
glue,
arrow,
newline,
character,
eof
];
/**Tokens matched while parsing variable definitions**/
static public final varDefTokens = @:rule [
identifier,
whitespace + "=" + whitespace => TEqual,
eof
];
/**Tokens matched while parsing variable value**/
static public final varValTokens = @:rule [
// Bool
whitespace + "(true|false)" => {lexer.current == "true" ? TBool(true) : TBool(false);},
// Number
whitespace + "-?(([1-9][0-9]*)|0)(.[0-9]+)?" => TNumber(Std.parseFloat(lexer.current)),
// Other Content
curlyOpen,
curlyClose,
arrow,
newline,
character,
eof
];
static public final identifierTokens = @:rule [
identifier,
eof
];
/**A reference to a point in the story**/
static public final referenceTokens = @:rule [
identifier,
newline,
dot,
eof
];
static public final blockCommentTokens = @:rule [
"\\*/" => null, // End comment
"*" => {buf.add("*"); lexer.token(blockCommentTokens);},
"." => {buf.add(lexer.current); lexer.token(blockCommentTokens);}
];
}
class InkTokenSource {
var lexer:InkLexer;
var tokenSource:LexerTokenSource<LexerToken>;
/**Used if we have read a token and postponed it**/
var pendingToken:LexerToken = null;
/**Depth of alternative parsing**/
static var alternativeDepth = 0;
public var ruleset(get, set):Ruleset<LexerToken>;
function get_ruleset() { return this.tokenSource.ruleset; }
function set_ruleset(value) { return this.tokenSource.ruleset = value; }
public var nextRuleset:Ruleset<LexerToken> = null;
public function new(lexer:InkLexer) {
this.lexer = lexer;
this.tokenSource = new LexerTokenSource(lexer, InkLexer.storyTokens);
}
public function token():LexerToken {
var token;
// The next token is the pending token if it is set
if (pendingToken != null) {
token = pendingToken;
pendingToken = null;
} else {
token = tokenSource.token();
}
// Apply `nextRulset` if set
if (nextRuleset != null) {
ruleset = nextRuleset;
nextRuleset = null;
}
// Change lexer ruleset based on parsed tokens
switch ([ruleset, token]) {
// Start parsing story tokens after newlines
case [_, TNewline]: ruleset = InkLexer.storyTokens;
// Parse variables and constants
case [_, TVar | TConst]: ruleset = InkLexer.varDefTokens;
case [InkLexer.varDefTokens, TEqual]: ruleset = InkLexer.varValTokens;
// Parse alternatives
case [_, TCurlyOpen]:
alternativeDepth++;
ruleset = InkLexer.alternativeConditionTokens;
case [InkLexer.alternativeConditionTokens, TBang|TAmpersand|TTilde]:
ruleset = InkLexer.alternativeContentTokens;
case [InkLexer.alternativeConditionTokens, TColon]:
ruleset = InkLexer.alternativeContentTokens;
case [InkLexer.alternativeContentTokens, TCurlyClose]:
alternativeDepth--;
if (alternativeDepth == 0) {
ruleset = InkLexer.contentTokens;
} else {
ruleset = InkLexer.alternativeConditionTokens;
}
// Start parsing content after tokens that indicate content
case [InkLexer.storyTokens, TChar(_) | TGlue]:
ruleset = InkLexer.contentTokens;
// Start parsing options
case [_, TStar | TPlus]: ruleset = InkLexer.optionDefTokens;
case [InkLexer.optionDefTokens, TParenOpen]: ruleset = InkLexer.identifierTokens; nextRuleset = InkLexer.optionDefTokens;
case [InkLexer.optionDefTokens, TSquareOpen | TChar(_)]: ruleset = InkLexer.optionContentTokens;
// Start parsing gather
case [_, TDash]: ruleset = InkLexer.gatherDefTokens;
case [InkLexer.gatherDefTokens, TParenOpen]: ruleset = InkLexer.identifierTokens; nextRuleset = InkLexer.gatherDefTokens;
case [InkLexer.gatherDefTokens, TCurlyOpen | TChar(_)]: ruleset = InkLexer.contentTokens;
// Parse diverts, knots, and stitches
case [_, TArrow]: ruleset = InkLexer.referenceTokens;
case [_, TTrippleEqual]: ruleset = InkLexer.identifierTokens; nextRuleset = InkLexer.storyTokens;
case [_, TEqual]: ruleset = InkLexer.identifierTokens; nextRuleset = InkLexer.storyTokens;
default: {};
}
// Merge connected `TChar` tokens into `TString` tokens
switch (token) {
case TChar(val):
var content = val;
// Loop through connected tokens
while (true) {
var nextToken = tokenSource.token();
switch (nextToken) {
case TChar(nextVal) | TIdentifier(nextVal):
content += nextVal;
case _:
pendingToken = nextToken;
return TString(content);
}
}
case _:
return token;
}
}
public function curPos():Position {
return tokenSource.curPos();
}
}
class InkParser extends hxparse.Parser<InkTokenSource, LexerToken> implements ParserBuilder {
public function new(input:byte.ByteData, sourceName:String) {
var lexer = new InkLexer(input, sourceName);
var ts = new InkTokenSource(lexer);
super(ts);
}
public function testParse():Void {
var line = "";
while (true) {
try {switch stream {
case [TArrow, TIdentifier(name)]: trace('Divert: {$name}');
case [TEof]: break;
case [TNewline]: trace(line); line = "";
case [token]: line += LexerTokenPrinter.print(token);
}}
catch (e:Dynamic) {trace(e);}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment