Skip to content

Instantly share code, notes, and snippets.

@hashmal
Created December 1, 2012 17:05
Show Gist options
  • Save hashmal/4183261 to your computer and use it in GitHub Desktop.
Save hashmal/4183261 to your computer and use it in GitHub Desktop.
Maize
#!/usr/bin/env ruby
require 'ap'
RUNTIME = "
rem:
add $8, %rdi
ret
dup:
mov (%rdi), %rax
sub $8, %rdi
mov %rax, (%rdi)
ret
sum:
mov (%rdi), %rax
add $8, %rdi
add (%rdi), %rax
mov %rax, (%rdi)
ret
gt:
mov (%rdi), %rax
add $8, %rdi
sub (%rdi), %rax
mov %rax, %rdi
"
# The lexer extracts tokens from a string.
class Lexer
attr_reader :source
# Obviously, this is a token...
class Token
attr_reader :type, :value, :location
def initialize type, location, source
@type = type
@location = location
@value = source[@location]
end
def to_s
"<Token :#{@type}, '#{@value}'>"
end
end
def initialize str
@source = str
@cursor = 0
end
def get_token
whitespace or comment or token_program or token_is or token_result or
token_end or token_then or token_else or token_label or token_goto or
token_identifier or token_literal or error
end
def look_token
saved = @cursor
result = get_token
@cursor = saved
return result
end
def next
get_token
nil
end
private
def whitespace
if current_string =~ /\A([ \s\n\t])/
@cursor += $1.length
return get_token
end
end
def comment
str = current_string
if str[0..1] == "--"
str.gsub(/\A([^\n]*\n?)/, '')
@cursor += $1.length
return get_token
end
end
def token_program
if current_string =~ /\Aprogram\b/
Token.new :PROGRAM, @cursor .. (@cursor+=7)-1, @source
# return [:PROGRAM, @cursor .. (@cursor+=7)-1]
end
end
def token_is
if current_string =~ /\A(is)\b/
Token.new :IS, @cursor .. (@cursor+=2)-1, @source
# return [:IS, @cursor .. (@cursor+=2)-1]
end
end
def token_result
if current_string =~ /\A(->)/
Token.new :"->", @cursor .. (@cursor+=2)-1, @source
# return [:"->", @cursor .. (@cursor+=2)-1]
end
end
def token_end
if current_string =~ /\Aend\b/
Token.new :END, @cursor .. (@cursor+=3)-1, @source
# return [:END, @cursor .. (@cursor+=3)-1]
end
end
def token_then
if current_string =~ /\Athen\b/
Token.new :THEN, @cursor .. (@cursor+=4)-1, @source
# return [:THEN, @cursor .. (@cursor+=4)-1]
end
end
def token_else
if current_string =~ /\Aelse\b/
Token.new :ELSE, @cursor .. (@cursor+=4)-1, @source
# return [:ELSE, @cursor .. (@cursor+=4)-1]
end
end
def token_label
if current_string =~ /\Alabel\b/
Token.new :LABEL, @cursor .. (@cursor+=5)-1, @source
# return [:LABEL, @cursor .. (@cursor+=5)-1]
end
end
def token_goto
if current_string =~ /\Agoto\b/
Token.new :GOTO, @cursor .. (@cursor+=4)-1, @source
# return [:GOTO, @cursor .. (@cursor+=4)-1]
end
end
def token_identifier
if current_string =~ /\A([A-Za-z](?:_?(?:[A-Za-z]|[0-9]))*)\b/
Token.new :IDENTIFIER, @cursor .. (@cursor+=$1.length)-1, @source
# return [:IDENTIFIER, @cursor .. (@cursor+=$1.length)-1]
end
end
def token_literal
if current_string =~ /\A(0x[0-9a-fA-F]+)\b/
Token.new :LITERAL, @cursor .. (@cursor+=$1.length)-1, @source
# return [:LITERAL, @cursor .. (@cursor+=$1.length)-1]
end
end
def error
if current_string.length > 0
raise "Invalid character at #{@cursor}: '#{@source[@cursor]}'"
end
end
def current_string
str = @source.slice(@cursor .. -1)
end
end
module Nodes
class Literal
def initialize token
@token = token
end
def to_s
"(Literal: #{@token.value})"
end
def ap
{'Literal' => @token.value}
end
def asm
" sub $#{size}, %rdi\n" +
" movq $#{@token.value}, (%rdi)\n"
end
private
def size
8
end
end
class Operation
def initialize token
@token = token
end
def to_s
"(Operation: #{@token.value})"
end
def ap
{'Operation' => @token.value}
end
def asm
# " ; ---\n" +
" call #{@token.value}\n"
end
end
class Goto
def initialize token
@token = token
end
def to_s
"(Goto: #{@token.value})"
end
def ap
{'Goto' => @token.value}
end
def asm
" jmp #{@token.value}\n"
end
end
class Label
def initialize token
@token = token
end
def to_s
"(Label: #{@token.value})"
end
def ap
{'Label' => @token.value}
end
def asm
"#{@token.value}:\n"
end
end
class Branch
def initialize yes, no
@yes = [yes].flatten
@no = [no].flatten
end
def to_s
"(Branch)"
end
def ap
if @no.length > 0
{
'then' => @yes.map{|o|o.ap},
'else' => @no.map{|o|o.ap}
}
else
{'then' => @yes.map{|o|o.ap}}
end
end
def asm
# " ; branch\n" +
" mov -8(%rdi), %rax\n" +
" cmp %rax, (%rdi)\n" +
" je branch1\n" +
" add $8, (%rdi)\n" +
@no.map{|o|o.asm}.join +
" jmp branch1__end\n" +
"branch1:\n" +
" add $8, (%rdi)\n" +
@yes.map{|o|o.asm}.join +
"branch1__end:\n"
end
end
class OperationDecl
def initialize name, args, ret, body
@name_token = name
@arg_type_tokens = args
@return_type_token = ret
@body = [body].flatten
end
def to_s
"(OperationDecl)"
end
def ap
v = {
'OperationDecl' =>
{
'name' => @name_token.value,
'body' => @body.map{|b|b.ap}
}
}
v['OperationDecl']['return_type'] = @return_type_token.value if @return_type_token
v['OperationDecl']['argument_types'] = @arg_type_tokens.map{|a|a.value} if @arg_type_tokens.length > 0
return v
end
def asm
if @name_token.value == "main"
"_main:\n" +
" mov %rsp, %rdi\n" +
" sub $65536, %rsp\n" +
@body.map{|b|b.asm}.join +
" mov %rdi, %rax\n" +
" add $65536, %rax\n" +
# " cmp %rax, %rsp\n" +
# " je _empty\n" +
" mov (%rdi), %rax\n" +
# " jmp _end\n" +
# "_empty:\n" +
# " xor %rax, %rax\n" +
# "_end:\n" +
" add $65536, %rsp\n" +
" ret\n" +
"\n"
else
"#{@name_token.value}:\n" +
# " ; ---\n" +
@body.map{|b|b.asm}.join +
" ret\n" +
"\n"
end
end
end
class TranslationUnit
def initialize op_decls
@declarations = op_decls
end
def to_s
"(TranslationUnit)"
end
def ap
{'TranslationUnit' => @declarations.map{|d|d.ap}}
end
def asm
# " .intel_syntax\n" +
" .globl _main\n" +
"\n" +
@declarations.map{|d|d.asm}.join +
"\n# RUNTIME LIBRARY #\n" +
RUNTIME
end
end
end
# The parser build a parse tree by analyzing tokens.
class Parser
A = Proc.new { |token| token == :GOTO ? 1 : nil }
B = Proc.new { |token| token == :LABEL ? 2 : nil }
C = Proc.new { |token| {:THEN => 5, :LABEL => 3, :GOTO => 4, :IDENTIFIER => 6, :LITERAL => 7}[token] }
D = Proc.new { |token| token == :THEN ? 8 : nil }
E = Proc.new { |token| [:THEN, :LABEL, :GOTO, :IDENTIFIER, :LITERAL].include?(token) ? 9 : 10 }
F = Proc.new { |token| {:END => 11, :ELSE => 12}[token] }
G = Proc.new { |token| token == :PROGRAM ? 13 : nil }
H = Proc.new { |token| token == :IDENTIFIER ? 14 : 15 }
I = Proc.new { |token| token == :"->" ? 16 : 17 }
J = Proc.new { |token| token == :PROGRAM ? 18 : token == /\Z/ ? 19 : nil }
RULES = {
1 => {:production => [:GOTO, :IDENTIFIER],
:action => Proc.new { |tokens|
Nodes::Goto.new tokens[-1]
}},
2 => {:production => [:LABEL, :IDENTIFIER],
:action => Proc.new { |tokens|
Nodes::Label.new tokens[-1]
}},
3 => {:production => [B],
:action => Proc.new { |tokens|
tokens
}},
4 => {:production => [A],
:action => Proc.new { |tokens|
tokens
}},
5 => {:production => [D],
:action => Proc.new { |tokens|
tokens
}},
6 => {:production => [:IDENTIFIER],
:action => Proc.new { |tokens|
Nodes::Operation.new tokens[0]
}},
7 => {:production => [:LITERAL],
:action => Proc.new { |tokens|
Nodes::Literal.new tokens[0]
}},
8 => {:production => [:THEN, E, F],
:action => Proc.new { |tokens|
Nodes::Branch.new tokens[1], (tokens[2] || [])
}},
9 => {:production => [C, E],
:action => Proc.new { |tokens|
if tokens[-1]
[tokens].flatten
else
tokens[0]
end
}},
10 => {:production => []},
11 => {:production => [:END],
:action => Proc.new { |tokens|
nil
}},
12 => {:production => [:ELSE, E, :END],
:action => Proc.new { |tokens|
tokens[1]
}},
13 => {:production => [:PROGRAM, H, I, :IS, E, :END],
:action => Proc.new { |tokens|
argname = [tokens[1]].flatten
Nodes::OperationDecl.new argname[-1], argname[0 .. -2], tokens[2], (tokens[4] || [])
}},
14 => {:production => [:IDENTIFIER, H],
:action => Proc.new { |tokens|
if tokens[-1]
tokens
else
tokens[0]
end
}},
15 => {:production => []},
16 => {:production => [:"->", :IDENTIFIER],
:action => Proc.new { |tokens|
tokens[1]
}},
17 => {:production => []},
18 => {:production => [G, J],
:action => Proc.new { |tokens|
[tokens].flatten
}},
19 => {:production => []}
}
def initialize source
@source = source
@lexer = Lexer.new @source
@stack = [/\Z/, J]
@out = []
end
def parse!
while @stack.length > 0 do
item = @stack.pop
case item
when Symbol
termial item
when Proc
nonterminal item
when Hash
block_termination item
end
end
return Nodes::TranslationUnit.new @out[0]
end
def termial item
token = @lexer.get_token
@out << token
raise "symbol #{item} doesn't match #{token}" unless item == token.type
end
def nonterminal item
token = @lexer.look_token
if token
rule = RULES[item.call(token.type)]
prod = rule[:production]
if prod == []
@out << nil
end
@stack.push(rule).concat(prod.reverse)
end
end
def block_termination arr
count = arr[:production].length
if count > 0
block = @out.pop(count)
@out << arr[:action].call(block)
end
end
end
# data = Parser.new "
# program main -> X is
# 0x00
# end
# "
# x = data.parse!
# ap x.ap, :index => false, :indent => 2
# require 'yaml'
# puts x.ap.to_yaml
# puts
# puts x.asm
ast = Parser.new(File.open(ARGV[0]).read).parse!
puts ast.asm
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment