Skip to content

Instantly share code, notes, and snippets.

@fabioyamate
Created November 24, 2009 19:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fabioyamate/242115 to your computer and use it in GitHub Desktop.
Save fabioyamate/242115 to your computer and use it in GitHub Desktop.
require 'grammar_wirth'
require 'pp'
NONTERMINAL = /[a-zA-Z][a-zA-Z_]*/
TERMINAL = /".+"/
RULES = /.*\./
RULE = /(?:([a-zA-Z][a-zA-Z_]*)\s*=)?\s*(.+\.)\n?/
TOKENS = {
"(" => "LPAR",
")" => "RPAR",
"{" => "LCUR",
"}" => "RCUR",
"[" => "LBRA",
"]" => "RBRA",
"=" => "ASSIGN",
"+=" => "ADD_ASSIGN",
"-=" => "SUB_ASSIGN",
"*=" => "MULT_ASSIGN",
"/=" => "DIV_ASSIGN",
"**=" => "PWR_ASSIGN",
"%=" => "MOD_ASSIGN",
"<" => "LT",
">" => "GT",
"<=" => "LE",
">=" => "GE",
"!=" => "NE",
"==" => "EQ",
"*" => "MULT",
"+" => "ADD",
"-" => "SUB",
"/" => "DIV",
"%" => "MOD",
"**" => "PWR",
".." => "RANGE"
}
TEMPLATE = <<-CODE
void %nonterminal%()
{
while (true) {
int current_state = 0;
switch (current_state) {
%transitions%
default:
fatal_error("unexpected state");
break;
}
}
}
CODE
def submachine_codegen(input, to)
<<-EOF
#{input}(); /* CALL SUB MACHINE */
current_state = #{to};
EOF
end
def token_codegen(input, to)
if input.eql?("ANY_CHAR")
<<-EOF
if (' ' <= ch && ch <= '~')
current_state = #{to};
EOF
else
input = TOKENS[input] if TOKENS.has_key?(input)
input = '"\""' if input.eql?('"')
input = %q["'"] if input.eql?("'")
<<-EOF
if (token->class == #{input})
current_state = #{to};
EOF
end
end
lexer = <<-LEXER
string = """ { "any_char" } """ | "'" { "any_char" } "'".
op_assign = "=" | "+=" | "/=" | "-=" | "%=" | "**=" .
op_expr = "<" | ">" | ">=" | "<=" | "!=" | "==" | "+" | "-" | "/" | "*" | "%" | "**" | "..".
integer = digit { digit }.
float = digit { digit } "." digit { digit }.
operation = identifier [ "!" | "?" ].
identifier = ( "_" | letter | letter ) { "_" | letter | digit }.
number = digit { digit }.
LEXER
file = File.new("lexer.c", "w")
output = ""
lexer.scan(RULES).each do |r|
name, decl = RULE.match(r).captures
w = Grammar::Wirth.new(decl)
dfa = w.minimized_dfa
dfa[:states].each do |state|
output << " case #{state}:\n"
moves = []
first = true
dfa[:transitions][state].each do |transition|
input, to = transition
data = /"(.+)"/.match(input)
if first
output << " ch = readc(in);\n"
first = false
end
if not data.nil?
moves << token_codegen(data.captures.first.upcase, to)
else
moves << submachine_codegen(input, to)
end
end
output << moves.join(' else ')
if moves.size > 1
output << %q[ else
fatal_error("Syntax Error");
]
end
output << " return; /* ACCEPT */\n" if dfa[:final].include?(state)
output << " break;\n"
end
file << TEMPLATE.sub('%nonterminal%', name).sub('%transitions%', output).gsub(/else\s+if/, "else if")
end
file.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment