Skip to content

Instantly share code, notes, and snippets.

@firejox
Created November 8, 2019 12:50
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save firejox/207c5be9aae7d6acc666b91d11bc110e to your computer and use it in GitHub Desktop.
Save firejox/207c5be9aae7d6acc666b91d11bc110e to your computer and use it in GitHub Desktop.
LL(1) parser generator
annotation ParserAttr
end
annotation TopSymbolAttr
end
annotation SymbolAttr
end
annotation Rule
end
struct Eplison
end
struct EndMarker
end
module LexerTrait
abstract def peek
abstract def shift
end
macro new_parser(type)
@[ParserAttr({:counter => 0, :eplison => Eplison, :end_marker => EndMarker})]
class {{ type }}
{{ yield }}
end
end
macro new_top_symbol(root)
{% if ann = @type.annotation(ParserAttr) %}
{% if parser = ann.args[0] %}
{% if top = parser[:top] %}
{{ raise "Top semantic has been defined" }}
{% else %}
{% parser[:top] = root %}
@[TopSymbolAttr]
@[SymbolAttr(parser: {{ @type }})]
class {{ root }}
{{yield}}
end
{% end %}
{% else %}
{{ raise "Parser #{@type} is with invalid attribute" }}
{% end %}
{% else %}
{{ raise "Top semantic #{root} is not contained in valid parser" }}
{% end %}
end
macro new_symbol(target)
{% if ann = @type.annotation(ParserAttr) %}
{% if parser = ann.args[0] %}
{% if top = parser[:top] %}
@[SymbolAttr(parser: {{ @type }})]
class {{target}}
{{yield}}
end
{% else %}
{{ raise "Top Semantic is undefined" }}
{% end %}
{% else %}
{{ raise "Parser #{@type} is with invalid attribute" }}
{% end %}
{% else %}
{{ raise "Semantic #{target} is not contained in valid parser" }}
{% end %}
end
macro add_rule(**args)
{% if ann = @type.annotation(SymbolAttr) %}
{% if args.empty? %}
{{ raise "Rule should not be empty" }}
{% elsif args[:lexer] %}
{{ raise "`lexer` is a special argument. Don't declasre with this name." }}
{% else %}
{%
parser_type = ann[:parser]
parser = parser_type.resolve.annotation(ParserAttr).args[0]
counter = parser[:counter]
parser[:counter] = counter + 1
keys = args.keys
%}
@[Rule({{ args.double_splat }})]
def self.rule{{ counter }}({{ keys.join(",").id }}, lexer)
{{ yield }}
end
{% end %}
{% else %}
{{ raise "#{@type} is invalid semantic" }}
{% end %}
end
macro visit(current)
{%
type = current.resolve
%}
{% if ann = type.annotation(ParserAttr) %}
{%
parser = ann.args[0]
%}
{% if top = parser[:top] %}
visit({{ current }}::{{ top }})
{% else %}
{{ raise "Parser #{current} has no top semantic defined" }}
{% end %}
{% elsif ann = type.annotation(SymbolAttr) %}
{%
parser_type = ann[:parser]
parser = parser_type.resolve.annotation(ParserAttr).args[0]
rtype = current.names[1]
%}
{% if non_terminal_set = parser[:non_terminal] %}
{% if !non_terminal_set[rtype] %}
{% non_terminal_set[rtype] = true %}
{% end %}
{% else %}
{% parser[:non_terminal] = { rtype => true } %}
{% end %}
{% for method in type.class.methods %}
{% if ann = method.annotation(Rule) %}
{% values = ann.named_args.values %}
{% if arr = parser[:rules] %}
{% arr << {current, method.name.symbolize, values} %}
{% else %}
{% parser[:rules] = [{current, method.name.symbolize, values}] %}
{% end %}
{% for sym in values %}
{% if sym.is_a?(Path) %}
{% if sym == parser[:eplison] %}
{% if terminal_set = parser[:terminal] %}
{% terminal_set[sym] = true %}
{% else %}
{% parser[:terminal] = { sym => true} %}
{% end %}
{% elsif non_terminal_set = parser[:non_terminal] %}
{% if !non_terminal_set[sym.id] %}
{% non_terminal_set[sym.id] = true %}
visit({{ parser_type }}::{{ sym }})
{% end %}
{% else %}
{% parser[:non_terminal] = { sym.id => true } %}
{% end %}
{% elsif terminal_set = parser[:terminal] %}
{% terminal_set[sym] = true %}
{% else %}
{% parser[:terminal] = { sym => true } %}
{% end %}
{% end %}
{% end %}
{% end %}
{% end %}
end
macro find_first1(parser_type, target_sym, current)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
non_terminal_set = parser[:non_terminal]
terminal_set = parser[:terminal]
current_type = current.resolve
current_sym = current.names[1]
visited_set = parser[:visited]
visited_set[current_sym.id] = true
%}
{% for method in current_type.class.methods %}
{% if ann = method.annotation(Rule) %}
{%
values = ann.named_args.values
sym = values[0]
%}
{% if non_terminal_set[sym.id] && !visited_set[sym.id]%}
find_first1({{ parser_type }}, {{ target_sym }}, {{ parser_type }}::{{ sym }})
{% elsif terminal_set[sym] %}
{% if first_set = parser[:first_set] %}
{% if set = first_set[target_sym.id] %}
{% set[sym] = true %}
{% else %}
{% first_set[target_sym.id] = { sym => true } %}
{% end %}
{% else %}
{% parser[:first_set] = { target_sym.id => { sym => true } } %}
{% end %}
{% end %}
{% end %}
{% end %}
end
macro find_first2(parser_type, current)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
non_terminal_set = parser[:non_terminal]
terminal_set = parser[:terminal]
eplison = parser[:eplison]
first_set = parser[:first_set]
current_type = current.resolve
current_sym = current.names[1]
current_set = first_set[current_sym.id]
%}
{% for method in current_type.class.methods %}
{% if ann = method.annotation(Rule) %}
{%
rhs = ann.named_args.values
state = 0
%}
{% for sym in rhs %}
{% if state == 0 %}
{% if non_terminal_set[sym.id]%}
{% set = first_set[sym.id] %}
{% for x in set.keys %}
{% current_set[x] = true %}
{% end %}
{% unless set[eplison] %}
{% state = 1 %}
{% end %}
{% else %}
{% state = 1 %}
{% end %}
{% end %}
{% end %}
{% end %}
{% end %}
end
macro build_first_set2(parser_type)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
non_terminal_set = parser[:non_terminal]
%}
{% for sym in non_terminal_set.keys %}
find_first2({{ parser_type }}, {{ parser_type }}::{{ sym }})
{% end %}
end
macro build_first_set(parser_type)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
non_terminal_set = parser[:non_terminal]
%}
{% for sym in non_terminal_set.keys %}
clean_visited_set({{ parser_type }})
find_first1({{ parser_type }}, {{ sym }}, {{ parser_type }}::{{ sym }})
{% end %}
{% for x in non_terminal_set %}
{% for sym in non_terminal_set.keys %}
find_first2({{ parser_type }}, {{ parser_type }}::{{ sym }})
{% end %}
{% end %}
end
macro find_prefollow(parser_type, target_sym, current)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
non_terminal_set = parser[:non_terminal]
terminal_set = parser[:terminal]
current_type = current.resolve
current_sym = current.names[1]
visited_set = parser[:visited]
prefollow_set = parser[:prefollow_set]
first_set = parser[:first_set]
%}
{% unless visited_set[current_sym.id] %}
{% visited_set[current_sym.id] = true %}
{% for rule in parser[:rules] %}
{%
lhs = rule[0]
lhs_sym = lhs.names[1]
rhs = rule[2]
state = 0
%}
{% for sym in rhs %}
{% if sym.id == current_sym.id %}
{% state = 1 %}
{% elsif state == 1 %}
{% if terminal_set[sym] %}
{% if sym != parser[:eplison] %}
{% if set = prefollow_set[target_sym.id] %}
{% set[sym] = true %}
{% else %}
{% prefollow_set[target_sym.id] = { sym => true } %}
{% end %}
{% state = 2 %}
{% end %}
{% else %}
{% for k_sym in first_set[sym.id].keys %}
{% if k_sym != parser[:eplison] %}
{% if set = prefollow_set[target_sym.id] %}
{% set[k_sym] = true %}
{% else %}
{% prefollow_set[target_sym.id] = { k_sym => true } %}
{% end %}
{% end %}
{% end %}
{% if first_set[sym.id][parser[:eplison]] %}
find_prefollow({{ parser_type }}, {{ target_sym }}, {{ parser_type }}::{{ sym }})
{% if set = prefollow_set[target_sym.id] %}
{% set[sym.id] = true %}
{% else %}
{% prefollow_set[target_sym.id] = { sym.id => true } %}
{% end %}
{% end %}
{% end %}
{% end %}
{% end %}
{% if state == 1 %}
find_prefollow({{ parser_type }}, {{ target_sym }}, {{ lhs }})
{% if set = prefollow_set[target_sym.id] %}
{% set[lhs_sym.id] = true %}
{% else %}
{% prefollow_set[target_sym.id] = { lhs_sym.id => true } %}
{% end %}
{% end %}
{% end %}
{% end %}
end
macro find_follow(parser_type, target_sym, current)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
non_terminal_set = parser[:non_terminal]
terminal_set = parser[:terminal]
current_type = current.resolve
current_sym = current.names[1]
visited_set = parser[:visited]
prefollow_set = parser[:prefollow_set]
follow_set = parser[:follow_set]
first_set = parser[:first_set]
%}
{% unless visited_set[current_sym.id] %}
{% visited_set[current_sym.id] = true %}
{% for sym in prefollow_set[current_sym.id] %}
{% if non_terminal_set[sym.id] %}
find_follow({{ parser_type }}, {{ target_sym }}, {{ parser_type }}::{{ sym }})
{% else %}
{% if set = follow_set[target_sym.id] %}
{% set[sym] = true %}
{% else %}
{% follow_set[target_sym.id] = { sym => true } %}
{% end %}
{% end %}
{% end %}
{% end %}
end
macro clean_visited_set(parser_type)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
parser[:visited] = { nil => nil }
%}
end
macro mark_visited_set(parser_type, sym)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
parser[:visited][sym.id] = true
%}
end
macro unmark_visited_set(parser_type, sym)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
parser[:visited][sym.id] = false
%}
end
macro build_prefollow_set(parser_type)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
non_terminal_set = parser[:non_terminal]
end_marker = parser[:end_marker]
top = parser[:top]
parser[:prefollow_set] = { top.id => { end_marker => true }}
%}
{% for sym in non_terminal_set.keys %}
clean_visited_set({{ parser_type }})
find_prefollow({{ parser_type }}, {{ sym }}, {{ parser_type }}::{{ sym }})
{% end %}
end
macro cleanup_prefollow_set(parser_type)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
parser[:prefollow_set].clear
%}
end
macro build_follow_set(parser_type)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
non_terminal_set = parser[:non_terminal]
end_marker = parser[:end_marker]
top = parser[:top]
parser[:follow_set] = { top.id => { end_marker => true }}
%}
build_prefollow_set({{ parser_type }})
{% for sym in non_terminal_set.keys %}
clean_visited_set({{ parser_type }})
find_follow({{ parser_type }}, {{ sym }}, {{ parser_type }}::{{ sym }})
{% end %}
cleanup_prefollow_set({{ parser_type }})
end
macro build_predict_set(parser_type)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
non_terminal_set = parser[:non_terminal]
terminal_set = parser[:terminal]
eplison = parser[:eplison]
first_set = parser[:first_set]
follow_set = parser[:follow_set]
%}
{% for rule, idx in parser[:rules] %}
{%
lhs = rule[0]
lhs_sym = lhs.names[1]
rhs = rule[2]
state = 0
%}
{% for sym in rhs %}
{% if state == 0 %}
{% if terminal_set[sym] %}
{% if sym != eplison %}
{% if predict_sets = parser[:predict_set] %}
{% if set = predict_sets[lhs_sym.id] %}
{% unless set[sym] %}
{% set[sym] = idx %}
{% end %}
{% else %}
{% predict_sets[lhs_sym.id] = { sym => idx } %}
{% end %}
{% else %}
{% parser[:predict_set] = { lhs_sym.id => { sym => idx } } %}
{% end %}
{% state = 1 %}
{% end %}
{% else %}
{% for t_sym in first_set[sym.id].keys %}
{% if t_sym != eplison %}
{% if predict_sets = parser[:predict_set] %}
{% if set = predict_sets[lhs_sym.id] %}
{% unless set[t_sym] %}
{% set[t_sym] = idx %}
{% end %}
{% else %}
{% predict_sets[lhs_sym.id] = { t_sym => idx } %}
{% end %}
{% else %}
{% parser[:predict_set] = { lhs_sym.id => { t_sym => idx } } %}
{% end %}
{% end %}
{% end %}
{% unless first_set[sym.id][eplison] %}
{% state = 1 %}
{% end %}
{% end %}
{% end %}
{% end %}
{% if state == 0 %}
{% for sym in follow_set[lhs_sym.id].keys %}
{% if predict_sets = parser[:predict_set] %}
{% if set = predict_sets[lhs_sym.id] %}
{% unless set[sym] %}
{% set[sym] = idx %}
{% end %}
{% else %}
{% predict_sets[lhs_sym.id] = { sym => idx } %}
{% end %}
{% else %}
{% parser[:predict_set] = { lhs_sym.id => { sym => idx } } %}
{% end %}
{% end %}
{% end %}
{% end %}
end
macro build_topdown_parsing_visitor(parser_type, visitor_type)
{%
parser = parser_type.resolve.annotation(ParserAttr).args[0]
non_terminal_set = parser[:non_terminal]
terminal_set = parser[:terminal]
eplison = parser[:eplison]
predict_set = parser[:predict_set]
top = parser[:top]
rules = parser[:rules]
%}
struct {{ visitor_type }}
def self.parse(lexer : LexerTrait)
visit({{ parser_type }}::{{ top }}, lexer)
end
{% for nt_sym in non_terminal_set %}
def self.visit(type : {{ parser_type }}::{{ nt_sym }}.class, lexer : LexerTrait)
{% begin %}
case lexer.peek
{% for sym, rule_idx in predict_set[nt_sym.id] %}
{%
rule = rules[rule_idx]
lhs = rule[0]
name = rule[1]
rhs = rule[2]
production = "#{nt_sym} -> " + rhs.join(' ')
%}
when {{ sym }}
{% if rhs[0] != eplison %}
{{ ("# " + production).id }}
{{ lhs }}.{{ name.id }}(
{% for rhs_sym in rhs %}
{% if non_terminal_set[rhs_sym.id] %}
visit({{ parser_type }}::{{ rhs_sym }}, lexer),
{% else %}
lexer.peek.tap { |t|
if t == {{ rhs_sym }}
lexer.shift
else
raise "Invalid token '#{t}'" + {{ " in production : " + production + " ,expect token :" + rhs_sym.stringify }}
end
} ,
{% end %}
{% end %}
lexer)
{% else %}
{{ ("# " + production).id }}
{{ lhs }}.{{ name.id }}( {{ eplison }}.new , lexer)
{% end %}
{% end %}
else
raise "Invalid token '#{lexer.peek}' when visiting #{ type }"
end
{% end %}
end
{% end %}
end
{% debug() %}
end
macro pp_parser(type)
{% pp type.resolve.annotation(ParserAttr).args[0] %}
end
macro pp_parser_get(type, sym)
{%
parser = type.resolve.annotation(ParserAttr).args[0]
pp parser[sym]
%}
end
new_parser(MyParser) do
new_top_symbol(E) do
add_rule(t: T, e1: E1) do
end
end
new_symbol(E1) do
add_rule(plus: '+', t: T, e1: E1) do
end
add_rule(e: Eplison) do
end
end
new_symbol(T) do
add_rule(f: F, t1: T1) do
end
end
new_symbol(T1) do
add_rule(star: '*', f: F, t1: T1) do
end
add_rule(e: Eplison) do
end
end
new_symbol(F) do
add_rule(lpar: '(', e: E, rpar: ')') do
end
add_rule(a: 'i', b: 'd') do
end
end
end
visit(MyParser)
build_first_set(MyParser)
build_follow_set(MyParser)
build_predict_set(MyParser)
build_topdown_parsing_visitor(MyParser, MyVisitor)
class MyLexer
include LexerTrait
@reader : Char::Reader
def initialize(str : String)
@reader = Char::Reader.new str
end
def peek
if (ch = @reader.current_char) != '\0'
ch
else
EndMarker.new
end
end
def shift
if @reader.has_next?
@reader.next_char
end
end
end
MyVisitor.parse(MyLexer.new("id+id"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment