Created
November 8, 2019 12:50
-
-
Save firejox/207c5be9aae7d6acc666b91d11bc110e to your computer and use it in GitHub Desktop.
LL(1) parser generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
annotation ParserAttr | |
end | |
annotation TopSymbolAttr | |
end | |
annotation SymbolAttr | |
end | |
annotation Rule | |
end | |
struct Eplison | |
end | |
struct EndMarker | |
end | |
module LexerTrait | |
abstract def peek | |
abstract def shift | |
end | |
macro new_parser(type) | |
@[ParserAttr({:counter => 0, :eplison => Eplison, :end_marker => EndMarker})] | |
class {{ type }} | |
{{ yield }} | |
end | |
end | |
macro new_top_symbol(root) | |
{% if ann = @type.annotation(ParserAttr) %} | |
{% if parser = ann.args[0] %} | |
{% if top = parser[:top] %} | |
{{ raise "Top semantic has been defined" }} | |
{% else %} | |
{% parser[:top] = root %} | |
@[TopSymbolAttr] | |
@[SymbolAttr(parser: {{ @type }})] | |
class {{ root }} | |
{{yield}} | |
end | |
{% end %} | |
{% else %} | |
{{ raise "Parser #{@type} is with invalid attribute" }} | |
{% end %} | |
{% else %} | |
{{ raise "Top semantic #{root} is not contained in valid parser" }} | |
{% end %} | |
end | |
macro new_symbol(target) | |
{% if ann = @type.annotation(ParserAttr) %} | |
{% if parser = ann.args[0] %} | |
{% if top = parser[:top] %} | |
@[SymbolAttr(parser: {{ @type }})] | |
class {{target}} | |
{{yield}} | |
end | |
{% else %} | |
{{ raise "Top Semantic is undefined" }} | |
{% end %} | |
{% else %} | |
{{ raise "Parser #{@type} is with invalid attribute" }} | |
{% end %} | |
{% else %} | |
{{ raise "Semantic #{target} is not contained in valid parser" }} | |
{% end %} | |
end | |
macro add_rule(**args) | |
{% if ann = @type.annotation(SymbolAttr) %} | |
{% if args.empty? %} | |
{{ raise "Rule should not be empty" }} | |
{% elsif args[:lexer] %} | |
{{ raise "`lexer` is a special argument. Don't declasre with this name." }} | |
{% else %} | |
{% | |
parser_type = ann[:parser] | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
counter = parser[:counter] | |
parser[:counter] = counter + 1 | |
keys = args.keys | |
%} | |
@[Rule({{ args.double_splat }})] | |
def self.rule{{ counter }}({{ keys.join(",").id }}, lexer) | |
{{ yield }} | |
end | |
{% end %} | |
{% else %} | |
{{ raise "#{@type} is invalid semantic" }} | |
{% end %} | |
end | |
macro visit(current) | |
{% | |
type = current.resolve | |
%} | |
{% if ann = type.annotation(ParserAttr) %} | |
{% | |
parser = ann.args[0] | |
%} | |
{% if top = parser[:top] %} | |
visit({{ current }}::{{ top }}) | |
{% else %} | |
{{ raise "Parser #{current} has no top semantic defined" }} | |
{% end %} | |
{% elsif ann = type.annotation(SymbolAttr) %} | |
{% | |
parser_type = ann[:parser] | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
rtype = current.names[1] | |
%} | |
{% if non_terminal_set = parser[:non_terminal] %} | |
{% if !non_terminal_set[rtype] %} | |
{% non_terminal_set[rtype] = true %} | |
{% end %} | |
{% else %} | |
{% parser[:non_terminal] = { rtype => true } %} | |
{% end %} | |
{% for method in type.class.methods %} | |
{% if ann = method.annotation(Rule) %} | |
{% values = ann.named_args.values %} | |
{% if arr = parser[:rules] %} | |
{% arr << {current, method.name.symbolize, values} %} | |
{% else %} | |
{% parser[:rules] = [{current, method.name.symbolize, values}] %} | |
{% end %} | |
{% for sym in values %} | |
{% if sym.is_a?(Path) %} | |
{% if sym == parser[:eplison] %} | |
{% if terminal_set = parser[:terminal] %} | |
{% terminal_set[sym] = true %} | |
{% else %} | |
{% parser[:terminal] = { sym => true} %} | |
{% end %} | |
{% elsif non_terminal_set = parser[:non_terminal] %} | |
{% if !non_terminal_set[sym.id] %} | |
{% non_terminal_set[sym.id] = true %} | |
visit({{ parser_type }}::{{ sym }}) | |
{% end %} | |
{% else %} | |
{% parser[:non_terminal] = { sym.id => true } %} | |
{% end %} | |
{% elsif terminal_set = parser[:terminal] %} | |
{% terminal_set[sym] = true %} | |
{% else %} | |
{% parser[:terminal] = { sym => true } %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% end %} | |
end | |
macro find_first1(parser_type, target_sym, current) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
non_terminal_set = parser[:non_terminal] | |
terminal_set = parser[:terminal] | |
current_type = current.resolve | |
current_sym = current.names[1] | |
visited_set = parser[:visited] | |
visited_set[current_sym.id] = true | |
%} | |
{% for method in current_type.class.methods %} | |
{% if ann = method.annotation(Rule) %} | |
{% | |
values = ann.named_args.values | |
sym = values[0] | |
%} | |
{% if non_terminal_set[sym.id] && !visited_set[sym.id]%} | |
find_first1({{ parser_type }}, {{ target_sym }}, {{ parser_type }}::{{ sym }}) | |
{% elsif terminal_set[sym] %} | |
{% if first_set = parser[:first_set] %} | |
{% if set = first_set[target_sym.id] %} | |
{% set[sym] = true %} | |
{% else %} | |
{% first_set[target_sym.id] = { sym => true } %} | |
{% end %} | |
{% else %} | |
{% parser[:first_set] = { target_sym.id => { sym => true } } %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% end %} | |
end | |
macro find_first2(parser_type, current) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
non_terminal_set = parser[:non_terminal] | |
terminal_set = parser[:terminal] | |
eplison = parser[:eplison] | |
first_set = parser[:first_set] | |
current_type = current.resolve | |
current_sym = current.names[1] | |
current_set = first_set[current_sym.id] | |
%} | |
{% for method in current_type.class.methods %} | |
{% if ann = method.annotation(Rule) %} | |
{% | |
rhs = ann.named_args.values | |
state = 0 | |
%} | |
{% for sym in rhs %} | |
{% if state == 0 %} | |
{% if non_terminal_set[sym.id]%} | |
{% set = first_set[sym.id] %} | |
{% for x in set.keys %} | |
{% current_set[x] = true %} | |
{% end %} | |
{% unless set[eplison] %} | |
{% state = 1 %} | |
{% end %} | |
{% else %} | |
{% state = 1 %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% end %} | |
end | |
macro build_first_set2(parser_type) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
non_terminal_set = parser[:non_terminal] | |
%} | |
{% for sym in non_terminal_set.keys %} | |
find_first2({{ parser_type }}, {{ parser_type }}::{{ sym }}) | |
{% end %} | |
end | |
macro build_first_set(parser_type) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
non_terminal_set = parser[:non_terminal] | |
%} | |
{% for sym in non_terminal_set.keys %} | |
clean_visited_set({{ parser_type }}) | |
find_first1({{ parser_type }}, {{ sym }}, {{ parser_type }}::{{ sym }}) | |
{% end %} | |
{% for x in non_terminal_set %} | |
{% for sym in non_terminal_set.keys %} | |
find_first2({{ parser_type }}, {{ parser_type }}::{{ sym }}) | |
{% end %} | |
{% end %} | |
end | |
macro find_prefollow(parser_type, target_sym, current) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
non_terminal_set = parser[:non_terminal] | |
terminal_set = parser[:terminal] | |
current_type = current.resolve | |
current_sym = current.names[1] | |
visited_set = parser[:visited] | |
prefollow_set = parser[:prefollow_set] | |
first_set = parser[:first_set] | |
%} | |
{% unless visited_set[current_sym.id] %} | |
{% visited_set[current_sym.id] = true %} | |
{% for rule in parser[:rules] %} | |
{% | |
lhs = rule[0] | |
lhs_sym = lhs.names[1] | |
rhs = rule[2] | |
state = 0 | |
%} | |
{% for sym in rhs %} | |
{% if sym.id == current_sym.id %} | |
{% state = 1 %} | |
{% elsif state == 1 %} | |
{% if terminal_set[sym] %} | |
{% if sym != parser[:eplison] %} | |
{% if set = prefollow_set[target_sym.id] %} | |
{% set[sym] = true %} | |
{% else %} | |
{% prefollow_set[target_sym.id] = { sym => true } %} | |
{% end %} | |
{% state = 2 %} | |
{% end %} | |
{% else %} | |
{% for k_sym in first_set[sym.id].keys %} | |
{% if k_sym != parser[:eplison] %} | |
{% if set = prefollow_set[target_sym.id] %} | |
{% set[k_sym] = true %} | |
{% else %} | |
{% prefollow_set[target_sym.id] = { k_sym => true } %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% if first_set[sym.id][parser[:eplison]] %} | |
find_prefollow({{ parser_type }}, {{ target_sym }}, {{ parser_type }}::{{ sym }}) | |
{% if set = prefollow_set[target_sym.id] %} | |
{% set[sym.id] = true %} | |
{% else %} | |
{% prefollow_set[target_sym.id] = { sym.id => true } %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% if state == 1 %} | |
find_prefollow({{ parser_type }}, {{ target_sym }}, {{ lhs }}) | |
{% if set = prefollow_set[target_sym.id] %} | |
{% set[lhs_sym.id] = true %} | |
{% else %} | |
{% prefollow_set[target_sym.id] = { lhs_sym.id => true } %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% end %} | |
end | |
macro find_follow(parser_type, target_sym, current) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
non_terminal_set = parser[:non_terminal] | |
terminal_set = parser[:terminal] | |
current_type = current.resolve | |
current_sym = current.names[1] | |
visited_set = parser[:visited] | |
prefollow_set = parser[:prefollow_set] | |
follow_set = parser[:follow_set] | |
first_set = parser[:first_set] | |
%} | |
{% unless visited_set[current_sym.id] %} | |
{% visited_set[current_sym.id] = true %} | |
{% for sym in prefollow_set[current_sym.id] %} | |
{% if non_terminal_set[sym.id] %} | |
find_follow({{ parser_type }}, {{ target_sym }}, {{ parser_type }}::{{ sym }}) | |
{% else %} | |
{% if set = follow_set[target_sym.id] %} | |
{% set[sym] = true %} | |
{% else %} | |
{% follow_set[target_sym.id] = { sym => true } %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% end %} | |
end | |
macro clean_visited_set(parser_type) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
parser[:visited] = { nil => nil } | |
%} | |
end | |
macro mark_visited_set(parser_type, sym) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
parser[:visited][sym.id] = true | |
%} | |
end | |
macro unmark_visited_set(parser_type, sym) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
parser[:visited][sym.id] = false | |
%} | |
end | |
macro build_prefollow_set(parser_type) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
non_terminal_set = parser[:non_terminal] | |
end_marker = parser[:end_marker] | |
top = parser[:top] | |
parser[:prefollow_set] = { top.id => { end_marker => true }} | |
%} | |
{% for sym in non_terminal_set.keys %} | |
clean_visited_set({{ parser_type }}) | |
find_prefollow({{ parser_type }}, {{ sym }}, {{ parser_type }}::{{ sym }}) | |
{% end %} | |
end | |
macro cleanup_prefollow_set(parser_type) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
parser[:prefollow_set].clear | |
%} | |
end | |
macro build_follow_set(parser_type) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
non_terminal_set = parser[:non_terminal] | |
end_marker = parser[:end_marker] | |
top = parser[:top] | |
parser[:follow_set] = { top.id => { end_marker => true }} | |
%} | |
build_prefollow_set({{ parser_type }}) | |
{% for sym in non_terminal_set.keys %} | |
clean_visited_set({{ parser_type }}) | |
find_follow({{ parser_type }}, {{ sym }}, {{ parser_type }}::{{ sym }}) | |
{% end %} | |
cleanup_prefollow_set({{ parser_type }}) | |
end | |
macro build_predict_set(parser_type) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
non_terminal_set = parser[:non_terminal] | |
terminal_set = parser[:terminal] | |
eplison = parser[:eplison] | |
first_set = parser[:first_set] | |
follow_set = parser[:follow_set] | |
%} | |
{% for rule, idx in parser[:rules] %} | |
{% | |
lhs = rule[0] | |
lhs_sym = lhs.names[1] | |
rhs = rule[2] | |
state = 0 | |
%} | |
{% for sym in rhs %} | |
{% if state == 0 %} | |
{% if terminal_set[sym] %} | |
{% if sym != eplison %} | |
{% if predict_sets = parser[:predict_set] %} | |
{% if set = predict_sets[lhs_sym.id] %} | |
{% unless set[sym] %} | |
{% set[sym] = idx %} | |
{% end %} | |
{% else %} | |
{% predict_sets[lhs_sym.id] = { sym => idx } %} | |
{% end %} | |
{% else %} | |
{% parser[:predict_set] = { lhs_sym.id => { sym => idx } } %} | |
{% end %} | |
{% state = 1 %} | |
{% end %} | |
{% else %} | |
{% for t_sym in first_set[sym.id].keys %} | |
{% if t_sym != eplison %} | |
{% if predict_sets = parser[:predict_set] %} | |
{% if set = predict_sets[lhs_sym.id] %} | |
{% unless set[t_sym] %} | |
{% set[t_sym] = idx %} | |
{% end %} | |
{% else %} | |
{% predict_sets[lhs_sym.id] = { t_sym => idx } %} | |
{% end %} | |
{% else %} | |
{% parser[:predict_set] = { lhs_sym.id => { t_sym => idx } } %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% unless first_set[sym.id][eplison] %} | |
{% state = 1 %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% if state == 0 %} | |
{% for sym in follow_set[lhs_sym.id].keys %} | |
{% if predict_sets = parser[:predict_set] %} | |
{% if set = predict_sets[lhs_sym.id] %} | |
{% unless set[sym] %} | |
{% set[sym] = idx %} | |
{% end %} | |
{% else %} | |
{% predict_sets[lhs_sym.id] = { sym => idx } %} | |
{% end %} | |
{% else %} | |
{% parser[:predict_set] = { lhs_sym.id => { sym => idx } } %} | |
{% end %} | |
{% end %} | |
{% end %} | |
{% end %} | |
end | |
macro build_topdown_parsing_visitor(parser_type, visitor_type) | |
{% | |
parser = parser_type.resolve.annotation(ParserAttr).args[0] | |
non_terminal_set = parser[:non_terminal] | |
terminal_set = parser[:terminal] | |
eplison = parser[:eplison] | |
predict_set = parser[:predict_set] | |
top = parser[:top] | |
rules = parser[:rules] | |
%} | |
struct {{ visitor_type }} | |
def self.parse(lexer : LexerTrait) | |
visit({{ parser_type }}::{{ top }}, lexer) | |
end | |
{% for nt_sym in non_terminal_set %} | |
def self.visit(type : {{ parser_type }}::{{ nt_sym }}.class, lexer : LexerTrait) | |
{% begin %} | |
case lexer.peek | |
{% for sym, rule_idx in predict_set[nt_sym.id] %} | |
{% | |
rule = rules[rule_idx] | |
lhs = rule[0] | |
name = rule[1] | |
rhs = rule[2] | |
production = "#{nt_sym} -> " + rhs.join(' ') | |
%} | |
when {{ sym }} | |
{% if rhs[0] != eplison %} | |
{{ ("# " + production).id }} | |
{{ lhs }}.{{ name.id }}( | |
{% for rhs_sym in rhs %} | |
{% if non_terminal_set[rhs_sym.id] %} | |
visit({{ parser_type }}::{{ rhs_sym }}, lexer), | |
{% else %} | |
lexer.peek.tap { |t| | |
if t == {{ rhs_sym }} | |
lexer.shift | |
else | |
raise "Invalid token '#{t}'" + {{ " in production : " + production + " ,expect token :" + rhs_sym.stringify }} | |
end | |
} , | |
{% end %} | |
{% end %} | |
lexer) | |
{% else %} | |
{{ ("# " + production).id }} | |
{{ lhs }}.{{ name.id }}( {{ eplison }}.new , lexer) | |
{% end %} | |
{% end %} | |
else | |
raise "Invalid token '#{lexer.peek}' when visiting #{ type }" | |
end | |
{% end %} | |
end | |
{% end %} | |
end | |
{% debug() %} | |
end | |
macro pp_parser(type) | |
{% pp type.resolve.annotation(ParserAttr).args[0] %} | |
end | |
macro pp_parser_get(type, sym) | |
{% | |
parser = type.resolve.annotation(ParserAttr).args[0] | |
pp parser[sym] | |
%} | |
end | |
new_parser(MyParser) do | |
new_top_symbol(E) do | |
add_rule(t: T, e1: E1) do | |
end | |
end | |
new_symbol(E1) do | |
add_rule(plus: '+', t: T, e1: E1) do | |
end | |
add_rule(e: Eplison) do | |
end | |
end | |
new_symbol(T) do | |
add_rule(f: F, t1: T1) do | |
end | |
end | |
new_symbol(T1) do | |
add_rule(star: '*', f: F, t1: T1) do | |
end | |
add_rule(e: Eplison) do | |
end | |
end | |
new_symbol(F) do | |
add_rule(lpar: '(', e: E, rpar: ')') do | |
end | |
add_rule(a: 'i', b: 'd') do | |
end | |
end | |
end | |
visit(MyParser) | |
build_first_set(MyParser) | |
build_follow_set(MyParser) | |
build_predict_set(MyParser) | |
build_topdown_parsing_visitor(MyParser, MyVisitor) | |
class MyLexer | |
include LexerTrait | |
@reader : Char::Reader | |
def initialize(str : String) | |
@reader = Char::Reader.new str | |
end | |
def peek | |
if (ch = @reader.current_char) != '\0' | |
ch | |
else | |
EndMarker.new | |
end | |
end | |
def shift | |
if @reader.has_next? | |
@reader.next_char | |
end | |
end | |
end | |
MyVisitor.parse(MyLexer.new("id+id")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment