hashmal/maize.rb

## maize.rb
#!/usr/bin/env ruby

require 'ap'

RUNTIME = "
rem:
	add	$8, %rdi
	ret

dup:
	mov	(%rdi), %rax
	sub	$8, %rdi
	mov	%rax, (%rdi)
	ret

sum:
	mov	(%rdi), %rax
	add	$8, %rdi
	add	(%rdi), %rax
	mov	%rax, (%rdi)
	ret

gt:
	mov	(%rdi), %rax
	add	$8, %rdi
	sub	(%rdi), %rax
	mov	%rax, %rdi

"

# The lexer extracts tokens from a string.
class Lexer
  attr_reader :source

  # Obviously, this is a token...
  class Token
    attr_reader :type, :value, :location

    def initialize type, location, source
      @type = type
      @location = location
      @value = source[@location]
    end

    def to_s
      "<Token :#{@type}, '#{@value}'>"
    end
  end

  def initialize str
    @source = str
    @cursor = 0
  end

  def get_token
    whitespace or comment or token_program or token_is or token_result or
    token_end or token_then or token_else or token_label or token_goto or
    token_identifier or token_literal or error
  end

  def look_token
    saved = @cursor
    result = get_token
    @cursor = saved
    return result
  end

  def next
    get_token
    nil
  end

  private

  def whitespace
    if current_string =~ /\A([ \s\n\t])/
      @cursor += $1.length
      return get_token
    end
  end

  def comment
    str = current_string

    if str[0..1] == "--"
      str.gsub(/\A([^\n]*\n?)/, '')
      @cursor += $1.length
      return get_token
    end
  end

  def token_program
    if current_string =~ /\Aprogram\b/
      Token.new :PROGRAM, @cursor .. (@cursor+=7)-1, @source
      # return [:PROGRAM, @cursor .. (@cursor+=7)-1]
    end
  end

  def token_is
    if current_string =~ /\A(is)\b/
      Token.new :IS, @cursor .. (@cursor+=2)-1, @source
      # return [:IS, @cursor .. (@cursor+=2)-1]
    end
  end

  def token_result
    if current_string =~ /\A(->)/
      Token.new :"->", @cursor .. (@cursor+=2)-1, @source
      # return [:"->", @cursor .. (@cursor+=2)-1]
    end
  end

  def token_end
    if current_string =~ /\Aend\b/
      Token.new :END, @cursor .. (@cursor+=3)-1, @source
      # return [:END, @cursor .. (@cursor+=3)-1]
    end
  end

  def token_then
    if current_string =~ /\Athen\b/
      Token.new :THEN, @cursor .. (@cursor+=4)-1, @source
      # return [:THEN, @cursor .. (@cursor+=4)-1]
    end
  end

  def token_else
    if current_string =~ /\Aelse\b/
      Token.new :ELSE, @cursor .. (@cursor+=4)-1, @source
      # return [:ELSE, @cursor .. (@cursor+=4)-1]
    end
  end

  def token_label
    if current_string =~ /\Alabel\b/
      Token.new :LABEL, @cursor .. (@cursor+=5)-1, @source
      # return [:LABEL, @cursor .. (@cursor+=5)-1]
    end
  end

  def token_goto
    if current_string =~ /\Agoto\b/
      Token.new :GOTO, @cursor .. (@cursor+=4)-1, @source
      # return [:GOTO, @cursor .. (@cursor+=4)-1]
    end
  end

  def token_identifier
    if current_string =~ /\A([A-Za-z](?:_?(?:[A-Za-z]|[0-9]))*)\b/
      Token.new :IDENTIFIER, @cursor .. (@cursor+=$1.length)-1, @source
      # return [:IDENTIFIER, @cursor .. (@cursor+=$1.length)-1]
    end
  end

  def token_literal
    if current_string =~ /\A(0x[0-9a-fA-F]+)\b/
      Token.new :LITERAL, @cursor .. (@cursor+=$1.length)-1, @source
      # return [:LITERAL, @cursor .. (@cursor+=$1.length)-1]
    end
  end

  def error
    if current_string.length > 0
      raise "Invalid character at #{@cursor}: '#{@source[@cursor]}'"
    end
  end

  def current_string
    str = @source.slice(@cursor .. -1)
  end
end

module Nodes
  class Literal
    def initialize token
      @token = token
    end

    def to_s
      "(Literal: #{@token.value})"
    end

    def ap
      {'Literal' => @token.value}
    end

    def asm
      "	sub	$#{size}, %rdi\n" +
      "	movq	$#{@token.value}, (%rdi)\n"
    end

    private

    def size
      8
    end
  end

  class Operation
    def initialize token
      @token = token
    end

    def to_s
      "(Operation: #{@token.value})"
    end

    def ap
      {'Operation' => @token.value}
    end

    def asm
      # " ; ---\n" +
      "	call	#{@token.value}\n"
    end
  end

  class Goto
    def initialize token
      @token = token
    end

    def to_s
      "(Goto: #{@token.value})"
    end

    def ap
      {'Goto' => @token.value}
    end

    def asm
      "	jmp	#{@token.value}\n"
    end
  end

  class Label
    def initialize token
      @token = token
    end

    def to_s
      "(Label: #{@token.value})"
    end

    def ap
      {'Label' => @token.value}
    end

    def asm
      "#{@token.value}:\n"
    end
  end

  class Branch
    def initialize yes, no
      @yes = [yes].flatten
      @no = [no].flatten
    end

    def to_s
      "(Branch)"
    end

    def ap
      if @no.length > 0
        {
          'then' => @yes.map{|o|o.ap},
          'else' => @no.map{|o|o.ap}
        }
      else
        {'then' => @yes.map{|o|o.ap}}
      end
    end

    def asm
      # " ; branch\n" +
      "	mov	-8(%rdi), %rax\n" +
      "	cmp	%rax, (%rdi)\n" +
      "	je	branch1\n" +
      "	add	$8, (%rdi)\n" +
      @no.map{|o|o.asm}.join +
      "	jmp	branch1__end\n" +
      "branch1:\n" +
      "	add	$8, (%rdi)\n" +
      @yes.map{|o|o.asm}.join +
      "branch1__end:\n"
    end
  end

  class OperationDecl
    def initialize name, args, ret, body
      @name_token = name
      @arg_type_tokens = args
      @return_type_token = ret
      @body = [body].flatten
    end

    def to_s
      "(OperationDecl)"
    end

    def ap
      v = {
        'OperationDecl' =>
        {
          'name' => @name_token.value,
          'body' => @body.map{|b|b.ap}
        }
      }
      v['OperationDecl']['return_type'] = @return_type_token.value if @return_type_token
      v['OperationDecl']['argument_types'] = @arg_type_tokens.map{|a|a.value} if @arg_type_tokens.length > 0
      return v
    end

    def asm
      if @name_token.value == "main"
        "_main:\n" +
        "	mov	%rsp, %rdi\n" +
        "	sub	$65536, %rsp\n" +
        @body.map{|b|b.asm}.join +
        "	mov	%rdi, %rax\n" +
        "	add	$65536, %rax\n" +
        # " cmp %rax, %rsp\n" +
        # " je  _empty\n" +
        "	mov	(%rdi), %rax\n" +
        # " jmp _end\n" +
        # "_empty:\n" +
        # " xor %rax, %rax\n" +
        # "_end:\n" +
        "	add	$65536, %rsp\n" +
        "	ret\n" +
        "\n"
      else
        "#{@name_token.value}:\n" +
        # " ; ---\n" +
        @body.map{|b|b.asm}.join +
        "	ret\n" +
        "\n"
      end
    end
  end

  class TranslationUnit
    def initialize op_decls
      @declarations = op_decls
    end

    def to_s
      "(TranslationUnit)"
    end

    def ap
      {'TranslationUnit' => @declarations.map{|d|d.ap}}
    end

    def asm
      # " .intel_syntax\n" +
      "	.globl _main\n" +
      "\n" +
      @declarations.map{|d|d.asm}.join +
      "\n# RUNTIME LIBRARY #\n" +
      RUNTIME
    end
  end
end


# The parser build a parse tree by analyzing tokens.
class Parser
  A = Proc.new { |token| token == :GOTO ? 1 : nil }
  B = Proc.new { |token| token == :LABEL ? 2 : nil }
  C = Proc.new { |token| {:THEN => 5, :LABEL => 3, :GOTO => 4, :IDENTIFIER => 6, :LITERAL => 7}[token] }
  D = Proc.new { |token| token == :THEN ? 8 : nil }
  E = Proc.new { |token| [:THEN, :LABEL, :GOTO, :IDENTIFIER, :LITERAL].include?(token) ? 9 : 10 }
  F = Proc.new { |token| {:END => 11, :ELSE => 12}[token] }
  G = Proc.new { |token| token == :PROGRAM ? 13 : nil }
  H = Proc.new { |token| token == :IDENTIFIER ? 14 : 15 }
  I = Proc.new { |token| token == :"->" ? 16 : 17 }
  J = Proc.new { |token| token == :PROGRAM ? 18 : token == /\Z/ ? 19 : nil }

  RULES = {
     1 => {:production => [:GOTO, :IDENTIFIER],
           :action => Proc.new { |tokens|
             Nodes::Goto.new tokens[-1]
           }},
     2 => {:production => [:LABEL, :IDENTIFIER],
           :action => Proc.new { |tokens|
             Nodes::Label.new tokens[-1]
           }},
     3 => {:production => [B],
           :action => Proc.new { |tokens|
              tokens
           }},
     4 => {:production => [A],
           :action => Proc.new { |tokens|
              tokens
           }},
     5 => {:production => [D],
           :action => Proc.new { |tokens|
              tokens
           }},
     6 => {:production => [:IDENTIFIER],
           :action => Proc.new { |tokens|
             Nodes::Operation.new tokens[0]
           }},
     7 => {:production => [:LITERAL],
           :action => Proc.new { |tokens|
             Nodes::Literal.new tokens[0]
           }},
     8 => {:production => [:THEN, E, F],
           :action => Proc.new { |tokens|
             Nodes::Branch.new tokens[1], (tokens[2] || [])
           }},
     9 => {:production => [C, E],
           :action => Proc.new { |tokens|
             if tokens[-1]
                [tokens].flatten
              else
                tokens[0]
              end
           }},
    10 => {:production => []},
    11 => {:production => [:END],
           :action => Proc.new { |tokens|
             nil
           }},
    12 => {:production => [:ELSE, E, :END],
           :action => Proc.new { |tokens|
             tokens[1]
           }},
    13 => {:production => [:PROGRAM, H, I, :IS, E, :END],
           :action => Proc.new { |tokens|
             argname = [tokens[1]].flatten
             Nodes::OperationDecl.new argname[-1], argname[0 .. -2], tokens[2], (tokens[4] || [])
           }},
    14 => {:production => [:IDENTIFIER, H],
           :action => Proc.new { |tokens|
             if tokens[-1]
               tokens
             else
               tokens[0]
             end
           }},
    15 => {:production => []},
    16 => {:production => [:"->", :IDENTIFIER],
           :action => Proc.new { |tokens|
             tokens[1]
           }},
    17 => {:production => []},
    18 => {:production => [G, J],
           :action => Proc.new { |tokens|
             [tokens].flatten
           }},
    19 => {:production => []}
  }

  def initialize source
    @source = source
    @lexer = Lexer.new @source
    @stack = [/\Z/, J]
    @out = []
  end

  def parse!
    while @stack.length > 0 do
      item = @stack.pop
      case item
      when Symbol
        termial item
      when Proc
        nonterminal item
      when Hash
        block_termination item
      end
    end

    return Nodes::TranslationUnit.new @out[0]
  end

  def termial item
    token = @lexer.get_token
    @out << token
    raise "symbol #{item} doesn't match #{token}" unless item == token.type
  end

  def nonterminal item
    token = @lexer.look_token
    if token
      rule = RULES[item.call(token.type)]
      prod = rule[:production]
      if prod == []
        @out << nil
      end
      @stack.push(rule).concat(prod.reverse)
    end
  end

  def block_termination arr
    count = arr[:production].length
    if count > 0
      block = @out.pop(count)
      @out << arr[:action].call(block)
    end
  end
end


# data = Parser.new "
# program main -> X is
#   0x00
# end
# "

# x = data.parse!

# ap x.ap, :index => false, :indent => 2

# require 'yaml'

# puts x.ap.to_yaml
# puts
# puts x.asm

ast = Parser.new(File.open(ARGV[0]).read).parse!
puts ast.asm
	#!/usr/bin/env ruby

	require 'ap'

	RUNTIME = "
	rem:
	add $8, %rdi
	ret

	dup:
	mov (%rdi), %rax
	sub $8, %rdi
	mov %rax, (%rdi)
	ret

	sum:
	mov (%rdi), %rax
	add $8, %rdi
	add (%rdi), %rax
	mov %rax, (%rdi)
	ret

	gt:
	mov (%rdi), %rax
	add $8, %rdi
	sub (%rdi), %rax
	mov %rax, %rdi

	"

	# The lexer extracts tokens from a string.
	class Lexer
	attr_reader :source

	# Obviously, this is a token...
	class Token
	attr_reader :type, :value, :location

	def initialize type, location, source
	@type = type
	@location = location
	@value = source[@location]
	end

	def to_s
	"<Token :#{@type}, '#{@value}'>"
	end
	end

	def initialize str
	@source = str
	@cursor = 0
	end

	def get_token
	whitespace or comment or token_program or token_is or token_result or
	token_end or token_then or token_else or token_label or token_goto or
	token_identifier or token_literal or error
	end

	def look_token
	saved = @cursor
	result = get_token
	@cursor = saved
	return result
	end

	def next
	get_token
	nil
	end

	private

	def whitespace
	if current_string =~ /\A([ \s\n\t])/
	@cursor += $1.length
	return get_token
	end
	end

	def comment
	str = current_string

	if str[0..1] == "--"
	str.gsub(/\A([^\n]*\n?)/, '')
	@cursor += $1.length
	return get_token
	end
	end

	def token_program
	if current_string =~ /\Aprogram\b/
	Token.new :PROGRAM, @cursor .. (@cursor+=7)-1, @source
	# return [:PROGRAM, @cursor .. (@cursor+=7)-1]
	end
	end

	def token_is
	if current_string =~ /\A(is)\b/
	Token.new :IS, @cursor .. (@cursor+=2)-1, @source
	# return [:IS, @cursor .. (@cursor+=2)-1]
	end
	end

	def token_result
	if current_string =~ /\A(->)/
	Token.new :"->", @cursor .. (@cursor+=2)-1, @source
	# return [:"->", @cursor .. (@cursor+=2)-1]
	end
	end

	def token_end
	if current_string =~ /\Aend\b/
	Token.new :END, @cursor .. (@cursor+=3)-1, @source
	# return [:END, @cursor .. (@cursor+=3)-1]
	end
	end

	def token_then
	if current_string =~ /\Athen\b/
	Token.new :THEN, @cursor .. (@cursor+=4)-1, @source
	# return [:THEN, @cursor .. (@cursor+=4)-1]
	end
	end

	def token_else
	if current_string =~ /\Aelse\b/
	Token.new :ELSE, @cursor .. (@cursor+=4)-1, @source
	# return [:ELSE, @cursor .. (@cursor+=4)-1]
	end
	end

	def token_label
	if current_string =~ /\Alabel\b/
	Token.new :LABEL, @cursor .. (@cursor+=5)-1, @source
	# return [:LABEL, @cursor .. (@cursor+=5)-1]
	end
	end

	def token_goto
	if current_string =~ /\Agoto\b/
	Token.new :GOTO, @cursor .. (@cursor+=4)-1, @source
	# return [:GOTO, @cursor .. (@cursor+=4)-1]
	end
	end

	def token_identifier
	if current_string =~ /\A([A-Za-z](?:_?(?:[A-Za-z]\|[0-9]))*)\b/
	Token.new :IDENTIFIER, @cursor .. (@cursor+=$1.length)-1, @source
	# return [:IDENTIFIER, @cursor .. (@cursor+=$1.length)-1]
	end
	end

	def token_literal
	if current_string =~ /\A(0x[0-9a-fA-F]+)\b/
	Token.new :LITERAL, @cursor .. (@cursor+=$1.length)-1, @source
	# return [:LITERAL, @cursor .. (@cursor+=$1.length)-1]
	end
	end

	def error
	if current_string.length > 0
	raise "Invalid character at #{@cursor}: '#{@source[@cursor]}'"
	end
	end

	def current_string
	str = @source.slice(@cursor .. -1)
	end
	end

	module Nodes
	class Literal
	def initialize token
	@token = token
	end

	def to_s
	"(Literal: #{@token.value})"
	end

	def ap
	{'Literal' => @token.value}
	end

	def asm
	" sub $#{size}, %rdi\n" +
	" movq $#{@token.value}, (%rdi)\n"
	end

	private

	def size
	8
	end
	end

	class Operation
	def initialize token
	@token = token
	end

	def to_s
	"(Operation: #{@token.value})"
	end

	def ap
	{'Operation' => @token.value}
	end

	def asm
	# " ; ---\n" +
	" call #{@token.value}\n"
	end
	end

	class Goto
	def initialize token
	@token = token
	end

	def to_s
	"(Goto: #{@token.value})"
	end

	def ap
	{'Goto' => @token.value}
	end

	def asm
	" jmp #{@token.value}\n"
	end
	end

	class Label
	def initialize token
	@token = token
	end

	def to_s
	"(Label: #{@token.value})"
	end

	def ap
	{'Label' => @token.value}
	end

	def asm
	"#{@token.value}:\n"
	end
	end

	class Branch
	def initialize yes, no
	@yes = [yes].flatten
	@no = [no].flatten
	end

	def to_s
	"(Branch)"
	end

	def ap
	if @no.length > 0
	{
	'then' => @yes.map{\|o\|o.ap},
	'else' => @no.map{\|o\|o.ap}
	}
	else
	{'then' => @yes.map{\|o\|o.ap}}
	end
	end

	def asm
	# " ; branch\n" +
	" mov -8(%rdi), %rax\n" +
	" cmp %rax, (%rdi)\n" +
	" je branch1\n" +
	" add $8, (%rdi)\n" +
	@no.map{\|o\|o.asm}.join +
	" jmp branch1__end\n" +
	"branch1:\n" +
	" add $8, (%rdi)\n" +
	@yes.map{\|o\|o.asm}.join +
	"branch1__end:\n"
	end
	end

	class OperationDecl
	def initialize name, args, ret, body
	@name_token = name
	@arg_type_tokens = args
	@return_type_token = ret
	@body = [body].flatten
	end

	def to_s
	"(OperationDecl)"
	end

	def ap
	v = {
	'OperationDecl' =>
	{
	'name' => @name_token.value,
	'body' => @body.map{\|b\|b.ap}
	}
	}
	v['OperationDecl']['return_type'] = @return_type_token.value if @return_type_token
	v['OperationDecl']['argument_types'] = @arg_type_tokens.map{\|a\|a.value} if @arg_type_tokens.length > 0
	return v
	end

	def asm
	if @name_token.value == "main"
	"_main:\n" +
	" mov %rsp, %rdi\n" +
	" sub $65536, %rsp\n" +
	@body.map{\|b\|b.asm}.join +
	" mov %rdi, %rax\n" +
	" add $65536, %rax\n" +
	# " cmp %rax, %rsp\n" +
	# " je _empty\n" +
	" mov (%rdi), %rax\n" +
	# " jmp _end\n" +
	# "_empty:\n" +
	# " xor %rax, %rax\n" +
	# "_end:\n" +
	" add $65536, %rsp\n" +
	" ret\n" +
	"\n"
	else
	"#{@name_token.value}:\n" +
	# " ; ---\n" +
	@body.map{\|b\|b.asm}.join +
	" ret\n" +
	"\n"
	end
	end
	end

	class TranslationUnit
	def initialize op_decls
	@declarations = op_decls
	end

	def to_s
	"(TranslationUnit)"
	end

	def ap
	{'TranslationUnit' => @declarations.map{\|d\|d.ap}}
	end

	def asm
	# " .intel_syntax\n" +
	" .globl _main\n" +
	"\n" +
	@declarations.map{\|d\|d.asm}.join +
	"\n# RUNTIME LIBRARY #\n" +
	RUNTIME
	end
	end
	end


	# The parser build a parse tree by analyzing tokens.
	class Parser
	A = Proc.new { \|token\| token == :GOTO ? 1 : nil }
	B = Proc.new { \|token\| token == :LABEL ? 2 : nil }
	C = Proc.new { \|token\| {:THEN => 5, :LABEL => 3, :GOTO => 4, :IDENTIFIER => 6, :LITERAL => 7}[token] }
	D = Proc.new { \|token\| token == :THEN ? 8 : nil }
	E = Proc.new { \|token\| [:THEN, :LABEL, :GOTO, :IDENTIFIER, :LITERAL].include?(token) ? 9 : 10 }
	F = Proc.new { \|token\| {:END => 11, :ELSE => 12}[token] }
	G = Proc.new { \|token\| token == :PROGRAM ? 13 : nil }
	H = Proc.new { \|token\| token == :IDENTIFIER ? 14 : 15 }
	I = Proc.new { \|token\| token == :"->" ? 16 : 17 }
	J = Proc.new { \|token\| token == :PROGRAM ? 18 : token == /\Z/ ? 19 : nil }

	RULES = {
	1 => {:production => [:GOTO, :IDENTIFIER],
	:action => Proc.new { \|tokens\|
	Nodes::Goto.new tokens[-1]
	}},
	2 => {:production => [:LABEL, :IDENTIFIER],
	:action => Proc.new { \|tokens\|
	Nodes::Label.new tokens[-1]
	}},
	3 => {:production => [B],
	:action => Proc.new { \|tokens\|
	tokens
	}},
	4 => {:production => [A],
	:action => Proc.new { \|tokens\|
	tokens
	}},
	5 => {:production => [D],
	:action => Proc.new { \|tokens\|
	tokens
	}},
	6 => {:production => [:IDENTIFIER],
	:action => Proc.new { \|tokens\|
	Nodes::Operation.new tokens[0]
	}},
	7 => {:production => [:LITERAL],
	:action => Proc.new { \|tokens\|
	Nodes::Literal.new tokens[0]
	}},
	8 => {:production => [:THEN, E, F],
	:action => Proc.new { \|tokens\|
	Nodes::Branch.new tokens[1], (tokens[2] \|\| [])
	}},
	9 => {:production => [C, E],
	:action => Proc.new { \|tokens\|
	if tokens[-1]
	[tokens].flatten
	else
	tokens[0]
	end
	}},
	10 => {:production => []},
	11 => {:production => [:END],
	:action => Proc.new { \|tokens\|
	nil
	}},
	12 => {:production => [:ELSE, E, :END],
	:action => Proc.new { \|tokens\|
	tokens[1]
	}},
	13 => {:production => [:PROGRAM, H, I, :IS, E, :END],
	:action => Proc.new { \|tokens\|
	argname = [tokens[1]].flatten
	Nodes::OperationDecl.new argname[-1], argname[0 .. -2], tokens[2], (tokens[4] \|\| [])
	}},
	14 => {:production => [:IDENTIFIER, H],
	:action => Proc.new { \|tokens\|
	if tokens[-1]
	tokens
	else
	tokens[0]
	end
	}},
	15 => {:production => []},
	16 => {:production => [:"->", :IDENTIFIER],
	:action => Proc.new { \|tokens\|
	tokens[1]
	}},
	17 => {:production => []},
	18 => {:production => [G, J],
	:action => Proc.new { \|tokens\|
	[tokens].flatten
	}},
	19 => {:production => []}
	}

	def initialize source
	@source = source
	@lexer = Lexer.new @source
	@stack = [/\Z/, J]
	@out = []
	end

	def parse!
	while @stack.length > 0 do
	item = @stack.pop
	case item
	when Symbol
	termial item
	when Proc
	nonterminal item
	when Hash
	block_termination item
	end
	end

	return Nodes::TranslationUnit.new @out[0]
	end

	def termial item
	token = @lexer.get_token
	@out << token
	raise "symbol #{item} doesn't match #{token}" unless item == token.type
	end

	def nonterminal item
	token = @lexer.look_token
	if token
	rule = RULES[item.call(token.type)]
	prod = rule[:production]
	if prod == []
	@out << nil
	end
	@stack.push(rule).concat(prod.reverse)
	end
	end

	def block_termination arr
	count = arr[:production].length
	if count > 0
	block = @out.pop(count)
	@out << arr[:action].call(block)
	end
	end
	end


	# data = Parser.new "
	# program main -> X is
	# 0x00
	# end
	# "

	# x = data.parse!

	# ap x.ap, :index => false, :indent => 2

	# require 'yaml'

	# puts x.ap.to_yaml
	# puts
	# puts x.asm

	ast = Parser.new(File.open(ARGV[0]).read).parse!
	puts ast.asm