Created
December 1, 2012 17:05
-
-
Save hashmal/4183261 to your computer and use it in GitHub Desktop.
Maize
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'ap' | |
RUNTIME = " | |
rem: | |
add $8, %rdi | |
ret | |
dup: | |
mov (%rdi), %rax | |
sub $8, %rdi | |
mov %rax, (%rdi) | |
ret | |
sum: | |
mov (%rdi), %rax | |
add $8, %rdi | |
add (%rdi), %rax | |
mov %rax, (%rdi) | |
ret | |
gt: | |
mov (%rdi), %rax | |
add $8, %rdi | |
sub (%rdi), %rax | |
mov %rax, %rdi | |
" | |
# The lexer extracts tokens from a string. | |
class Lexer | |
attr_reader :source | |
# Obviously, this is a token... | |
class Token | |
attr_reader :type, :value, :location | |
def initialize type, location, source | |
@type = type | |
@location = location | |
@value = source[@location] | |
end | |
def to_s | |
"<Token :#{@type}, '#{@value}'>" | |
end | |
end | |
def initialize str | |
@source = str | |
@cursor = 0 | |
end | |
def get_token | |
whitespace or comment or token_program or token_is or token_result or | |
token_end or token_then or token_else or token_label or token_goto or | |
token_identifier or token_literal or error | |
end | |
def look_token | |
saved = @cursor | |
result = get_token | |
@cursor = saved | |
return result | |
end | |
def next | |
get_token | |
nil | |
end | |
private | |
def whitespace | |
if current_string =~ /\A([ \s\n\t])/ | |
@cursor += $1.length | |
return get_token | |
end | |
end | |
def comment | |
str = current_string | |
if str[0..1] == "--" | |
str.gsub(/\A([^\n]*\n?)/, '') | |
@cursor += $1.length | |
return get_token | |
end | |
end | |
def token_program | |
if current_string =~ /\Aprogram\b/ | |
Token.new :PROGRAM, @cursor .. (@cursor+=7)-1, @source | |
# return [:PROGRAM, @cursor .. (@cursor+=7)-1] | |
end | |
end | |
def token_is | |
if current_string =~ /\A(is)\b/ | |
Token.new :IS, @cursor .. (@cursor+=2)-1, @source | |
# return [:IS, @cursor .. (@cursor+=2)-1] | |
end | |
end | |
def token_result | |
if current_string =~ /\A(->)/ | |
Token.new :"->", @cursor .. (@cursor+=2)-1, @source | |
# return [:"->", @cursor .. (@cursor+=2)-1] | |
end | |
end | |
def token_end | |
if current_string =~ /\Aend\b/ | |
Token.new :END, @cursor .. (@cursor+=3)-1, @source | |
# return [:END, @cursor .. (@cursor+=3)-1] | |
end | |
end | |
def token_then | |
if current_string =~ /\Athen\b/ | |
Token.new :THEN, @cursor .. (@cursor+=4)-1, @source | |
# return [:THEN, @cursor .. (@cursor+=4)-1] | |
end | |
end | |
def token_else | |
if current_string =~ /\Aelse\b/ | |
Token.new :ELSE, @cursor .. (@cursor+=4)-1, @source | |
# return [:ELSE, @cursor .. (@cursor+=4)-1] | |
end | |
end | |
def token_label | |
if current_string =~ /\Alabel\b/ | |
Token.new :LABEL, @cursor .. (@cursor+=5)-1, @source | |
# return [:LABEL, @cursor .. (@cursor+=5)-1] | |
end | |
end | |
def token_goto | |
if current_string =~ /\Agoto\b/ | |
Token.new :GOTO, @cursor .. (@cursor+=4)-1, @source | |
# return [:GOTO, @cursor .. (@cursor+=4)-1] | |
end | |
end | |
def token_identifier | |
if current_string =~ /\A([A-Za-z](?:_?(?:[A-Za-z]|[0-9]))*)\b/ | |
Token.new :IDENTIFIER, @cursor .. (@cursor+=$1.length)-1, @source | |
# return [:IDENTIFIER, @cursor .. (@cursor+=$1.length)-1] | |
end | |
end | |
def token_literal | |
if current_string =~ /\A(0x[0-9a-fA-F]+)\b/ | |
Token.new :LITERAL, @cursor .. (@cursor+=$1.length)-1, @source | |
# return [:LITERAL, @cursor .. (@cursor+=$1.length)-1] | |
end | |
end | |
def error | |
if current_string.length > 0 | |
raise "Invalid character at #{@cursor}: '#{@source[@cursor]}'" | |
end | |
end | |
def current_string | |
str = @source.slice(@cursor .. -1) | |
end | |
end | |
module Nodes | |
class Literal | |
def initialize token | |
@token = token | |
end | |
def to_s | |
"(Literal: #{@token.value})" | |
end | |
def ap | |
{'Literal' => @token.value} | |
end | |
def asm | |
" sub $#{size}, %rdi\n" + | |
" movq $#{@token.value}, (%rdi)\n" | |
end | |
private | |
def size | |
8 | |
end | |
end | |
class Operation | |
def initialize token | |
@token = token | |
end | |
def to_s | |
"(Operation: #{@token.value})" | |
end | |
def ap | |
{'Operation' => @token.value} | |
end | |
def asm | |
# " ; ---\n" + | |
" call #{@token.value}\n" | |
end | |
end | |
class Goto | |
def initialize token | |
@token = token | |
end | |
def to_s | |
"(Goto: #{@token.value})" | |
end | |
def ap | |
{'Goto' => @token.value} | |
end | |
def asm | |
" jmp #{@token.value}\n" | |
end | |
end | |
class Label | |
def initialize token | |
@token = token | |
end | |
def to_s | |
"(Label: #{@token.value})" | |
end | |
def ap | |
{'Label' => @token.value} | |
end | |
def asm | |
"#{@token.value}:\n" | |
end | |
end | |
class Branch | |
def initialize yes, no | |
@yes = [yes].flatten | |
@no = [no].flatten | |
end | |
def to_s | |
"(Branch)" | |
end | |
def ap | |
if @no.length > 0 | |
{ | |
'then' => @yes.map{|o|o.ap}, | |
'else' => @no.map{|o|o.ap} | |
} | |
else | |
{'then' => @yes.map{|o|o.ap}} | |
end | |
end | |
def asm | |
# " ; branch\n" + | |
" mov -8(%rdi), %rax\n" + | |
" cmp %rax, (%rdi)\n" + | |
" je branch1\n" + | |
" add $8, (%rdi)\n" + | |
@no.map{|o|o.asm}.join + | |
" jmp branch1__end\n" + | |
"branch1:\n" + | |
" add $8, (%rdi)\n" + | |
@yes.map{|o|o.asm}.join + | |
"branch1__end:\n" | |
end | |
end | |
class OperationDecl | |
def initialize name, args, ret, body | |
@name_token = name | |
@arg_type_tokens = args | |
@return_type_token = ret | |
@body = [body].flatten | |
end | |
def to_s | |
"(OperationDecl)" | |
end | |
def ap | |
v = { | |
'OperationDecl' => | |
{ | |
'name' => @name_token.value, | |
'body' => @body.map{|b|b.ap} | |
} | |
} | |
v['OperationDecl']['return_type'] = @return_type_token.value if @return_type_token | |
v['OperationDecl']['argument_types'] = @arg_type_tokens.map{|a|a.value} if @arg_type_tokens.length > 0 | |
return v | |
end | |
def asm | |
if @name_token.value == "main" | |
"_main:\n" + | |
" mov %rsp, %rdi\n" + | |
" sub $65536, %rsp\n" + | |
@body.map{|b|b.asm}.join + | |
" mov %rdi, %rax\n" + | |
" add $65536, %rax\n" + | |
# " cmp %rax, %rsp\n" + | |
# " je _empty\n" + | |
" mov (%rdi), %rax\n" + | |
# " jmp _end\n" + | |
# "_empty:\n" + | |
# " xor %rax, %rax\n" + | |
# "_end:\n" + | |
" add $65536, %rsp\n" + | |
" ret\n" + | |
"\n" | |
else | |
"#{@name_token.value}:\n" + | |
# " ; ---\n" + | |
@body.map{|b|b.asm}.join + | |
" ret\n" + | |
"\n" | |
end | |
end | |
end | |
class TranslationUnit | |
def initialize op_decls | |
@declarations = op_decls | |
end | |
def to_s | |
"(TranslationUnit)" | |
end | |
def ap | |
{'TranslationUnit' => @declarations.map{|d|d.ap}} | |
end | |
def asm | |
# " .intel_syntax\n" + | |
" .globl _main\n" + | |
"\n" + | |
@declarations.map{|d|d.asm}.join + | |
"\n# RUNTIME LIBRARY #\n" + | |
RUNTIME | |
end | |
end | |
end | |
# The parser build a parse tree by analyzing tokens. | |
class Parser | |
A = Proc.new { |token| token == :GOTO ? 1 : nil } | |
B = Proc.new { |token| token == :LABEL ? 2 : nil } | |
C = Proc.new { |token| {:THEN => 5, :LABEL => 3, :GOTO => 4, :IDENTIFIER => 6, :LITERAL => 7}[token] } | |
D = Proc.new { |token| token == :THEN ? 8 : nil } | |
E = Proc.new { |token| [:THEN, :LABEL, :GOTO, :IDENTIFIER, :LITERAL].include?(token) ? 9 : 10 } | |
F = Proc.new { |token| {:END => 11, :ELSE => 12}[token] } | |
G = Proc.new { |token| token == :PROGRAM ? 13 : nil } | |
H = Proc.new { |token| token == :IDENTIFIER ? 14 : 15 } | |
I = Proc.new { |token| token == :"->" ? 16 : 17 } | |
J = Proc.new { |token| token == :PROGRAM ? 18 : token == /\Z/ ? 19 : nil } | |
RULES = { | |
1 => {:production => [:GOTO, :IDENTIFIER], | |
:action => Proc.new { |tokens| | |
Nodes::Goto.new tokens[-1] | |
}}, | |
2 => {:production => [:LABEL, :IDENTIFIER], | |
:action => Proc.new { |tokens| | |
Nodes::Label.new tokens[-1] | |
}}, | |
3 => {:production => [B], | |
:action => Proc.new { |tokens| | |
tokens | |
}}, | |
4 => {:production => [A], | |
:action => Proc.new { |tokens| | |
tokens | |
}}, | |
5 => {:production => [D], | |
:action => Proc.new { |tokens| | |
tokens | |
}}, | |
6 => {:production => [:IDENTIFIER], | |
:action => Proc.new { |tokens| | |
Nodes::Operation.new tokens[0] | |
}}, | |
7 => {:production => [:LITERAL], | |
:action => Proc.new { |tokens| | |
Nodes::Literal.new tokens[0] | |
}}, | |
8 => {:production => [:THEN, E, F], | |
:action => Proc.new { |tokens| | |
Nodes::Branch.new tokens[1], (tokens[2] || []) | |
}}, | |
9 => {:production => [C, E], | |
:action => Proc.new { |tokens| | |
if tokens[-1] | |
[tokens].flatten | |
else | |
tokens[0] | |
end | |
}}, | |
10 => {:production => []}, | |
11 => {:production => [:END], | |
:action => Proc.new { |tokens| | |
nil | |
}}, | |
12 => {:production => [:ELSE, E, :END], | |
:action => Proc.new { |tokens| | |
tokens[1] | |
}}, | |
13 => {:production => [:PROGRAM, H, I, :IS, E, :END], | |
:action => Proc.new { |tokens| | |
argname = [tokens[1]].flatten | |
Nodes::OperationDecl.new argname[-1], argname[0 .. -2], tokens[2], (tokens[4] || []) | |
}}, | |
14 => {:production => [:IDENTIFIER, H], | |
:action => Proc.new { |tokens| | |
if tokens[-1] | |
tokens | |
else | |
tokens[0] | |
end | |
}}, | |
15 => {:production => []}, | |
16 => {:production => [:"->", :IDENTIFIER], | |
:action => Proc.new { |tokens| | |
tokens[1] | |
}}, | |
17 => {:production => []}, | |
18 => {:production => [G, J], | |
:action => Proc.new { |tokens| | |
[tokens].flatten | |
}}, | |
19 => {:production => []} | |
} | |
def initialize source | |
@source = source | |
@lexer = Lexer.new @source | |
@stack = [/\Z/, J] | |
@out = [] | |
end | |
def parse! | |
while @stack.length > 0 do | |
item = @stack.pop | |
case item | |
when Symbol | |
termial item | |
when Proc | |
nonterminal item | |
when Hash | |
block_termination item | |
end | |
end | |
return Nodes::TranslationUnit.new @out[0] | |
end | |
def termial item | |
token = @lexer.get_token | |
@out << token | |
raise "symbol #{item} doesn't match #{token}" unless item == token.type | |
end | |
def nonterminal item | |
token = @lexer.look_token | |
if token | |
rule = RULES[item.call(token.type)] | |
prod = rule[:production] | |
if prod == [] | |
@out << nil | |
end | |
@stack.push(rule).concat(prod.reverse) | |
end | |
end | |
def block_termination arr | |
count = arr[:production].length | |
if count > 0 | |
block = @out.pop(count) | |
@out << arr[:action].call(block) | |
end | |
end | |
end | |
# data = Parser.new " | |
# program main -> X is | |
# 0x00 | |
# end | |
# " | |
# x = data.parse! | |
# ap x.ap, :index => false, :indent => 2 | |
# require 'yaml' | |
# puts x.ap.to_yaml | |
# puts | |
# puts x.asm | |
ast = Parser.new(File.open(ARGV[0]).read).parse! | |
puts ast.asm | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment