Skip to content

Instantly share code, notes, and snippets.

@andrewpurcell
Created December 19, 2011 16:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewpurcell/1497920 to your computer and use it in GitHub Desktop.
Save andrewpurcell/1497920 to your computer and use it in GitHub Desktop.
EE126 MIPS Assembler
#!/usr/bin/env ruby
#
# mips.rb - a class for working with MIPS assembly code
# in ruby. Provides an assembler to translate
# non floating point operations to machine code.
#
# Andrew Purcell, Tufts University
# EE126: Computer Engineering, Fall 2011
class Assembler
# initialize(file_name) opens the file name given and defines
# the structures necessary to start compilation.
def initialize(file_name)
# Set the instruction width / word size
@@w = 4
# open file
@file = File.open(file_name) || exit("Can't open file.")
@lines = []
# read assembly file to memory
@file.each_line { |l| @lines << l.chomp unless l[/^\s+$/] }
@symbols = {}
@first_pass = []
end
# on first pass:
# * parse each line of file to instructions/arguments
# * build a global symbol table
# * replace pseudo instructions with their decompositions
def first_pass()
# initialize section, offset for tracking symbols
mem_section = 0
offset = 0
@lines.each_index do |l|
ops = parse_line(@lines[l])
pseudo = false
# record and remove labels
if ops[0][/^\w+:/]
label = ops.shift
@symbols[label.chop] = {:mem => mem_section, :offset => offset}
end
case ops[0]
when /^\w+/ # case for instructions
offset += @@w unless ops[0].nil?
# check if instruction is pseudo-instruction
if ops[0] and not Pseudo[ops[0]].nil?
sub = get_pseudo(ops)
sub.each { |s| @first_pass << parse_line(s) }
pseudo = true
end
when ".org"
# start new memory section
mem_section = ops[1].to_i
offset = 0
when ".byte"
# will be translated in second pass
offset+=@@w
when ".end"
# this should be the last instruction, pass over for now
offset += @@w
# puts "Encountered a .end"
else
@error = "Invalid instruction, cannot parse at line #{l}: #{@lines[l]}"
puts @error
end
@first_pass << ops unless pseudo
end
end
# on second pass, translate instructions to machine code
def second_pass()
pc = 0
@first_pass.each do |l|
i = parse_line(l)
# puts i
if directive? i
case i[0]
when ".byte"
printf "Address 0x%04x: %08x => %s\n",pc, translate_byte(i), i.to_s
when ".org"
pc = i[1].to_i
pc -= @@w
when ".end"
end
elsif not OP[i[0]].nil?
hex = case OP[i[0]][:format]
when :r
r_fmt(i)
when :i
i_fmt(i, pc)
when :j
j_fmt(i)
end
printf "Address 0x%04x: %08x => %s\n",pc, hex, i.to_s
else
puts "Invalid instruction: #{i}"
end
pc += @@w
end
end
def r_fmt(r)
if r.length != 4
puts "Instruction #{r} is not valid"
return
end
code = OP[r[0]][:op].to_i
if is_shift? r[0]
code |= (r[3].to_i << 6)
else
code |= (get_reg r[3]) << 21
# printf "%s => %s << 22 = %08x\n", r.to_s, r[3].to_s, (get_reg r[3]) << 21
end
code |= (get_reg(r[1]) << 11)|(get_reg(r[2]) << 16)
code
end
def i_fmt(i, pc)
code = OP[i[0]][:op].to_i
code |= (get_reg(i[1]) << 21) | (get_reg(i[2]) << 16) #| (to_addr i[3])
# handle branching offsets
if OP[i[0]][:branch]
code |= ((to_addr(i[3])-pc-@@w))&0xFFFF
# puts "branching, pc=0x#{pc.to_s(16)} label=0x#{to_addr(i[3]).to_s(16)}"
# printf "difference = %i = 0x%4x\n",
# ((to_addr(i[3])-(pc+@@w))), ((to_addr(i[3])-(pc+@@w))&0xFFFF)
else
code |= to_addr i[3]
end
code
end
def j_fmt(j)
if not @symbols[j[1]].nil?
return OP[j[0]][:op] | to_addr(j[1])
elsif j[1].to_i != 0
return OP[j[0]][:op] | j[1].to_i
else
puts "Error: label #{j[1]} not defined."
return OP[j[0]][:op]
end
end
# get pseudo takes an array of a parsed pseudo instruction
# and returns an array of native instructions
def get_pseudo(instr)
sub = Pseudo[instr[0]] # pull substitution from lookup table
args = instr.length-1
(0...args).each do |i|
sub.each do |s|
if !s.index(i.to_s.to_sym).nil?
s[s.index(i.to_s.to_sym)] = instr[i+1]
end
end
end
sub.map! {|s| join_instr(s) }
# puts sub.to_s
sub
end
def join_instr(arr)
arr[0]+' '+arr[1..-1].join(", ")
end
def parse_line(line)
return line if line.class.eql? Array
line.split.map! {|t| t.split(',') }.flatten
end
def translate_byte(b)
shift = 32
#b[1..-1].map {|n| shift -= 8; to_dec(n) << shift}
b[1..-1].inject(0) do |word, n|
shift-=8
word |= (to_dec(n) << shift)
end
end
def to_dec(num)
return num.to_i(16) if num.to_s[/[hH]$/]
return num.to_i(2) if num.to_s[/[bB]$/]
num.to_i
end
def print_lines()
@lines.each { |l| puts l.to_s }
end
def print_first_pass()
@first_pass.each { |l| puts l.to_s }
end
def is_shift?(instr)
Shift_Instr.include? instr
end
def get_reg(reg)
RT[reg] || 0
end
def print_symbol_table()
puts "****************"
puts " SYMBOL TABLE"
@symbols.each do |k,v|
puts "\t* #{k} => 0x#{((v[:mem] + v[:offset])).to_s(16)}"
puts "\t\t=#{v[:mem]} + #{v[:offset]}"
end
puts "****************"
end
def is_label?(l)
not @symbols[l].nil?
end
def directive?(line)
line[0][/^\./] != nil
end
def to_addr(label)
return label.to_i if label.to_i != 0
return @symbols[label][:mem]+@symbols[label][:offset] if is_label?(label)
0
end
# opcode lookup table
OP = {
'add' => { :format => :r, :op => 0x00000020 },
'addi' => { :format => :i,:op => 0x20000000 },
'and' => { :format => :r, :op => 0x00000024 },
'andi' => { :format => :i,:op => 0x30000000 },
'beq' => { :format => :i, :op => 0x10000000, :branch => true },
'bne' => { :format => :i, :op => 0x14000000, :branch => true },
'j' => { :format => :j, :op => 0x08000000 },
'jal' => { :format => :j, :op => 0x03000000 },
'jr' => { :format => :r, :op => 0x00000008 },
'lb' => { :format => :i, :op => 0x80000000 },
'lui' => { :format => :i, :op => 0x3c000000 },
'lw' => { :format => :i, :op => 0x8c000000 },
'nor' => { :format => :r, :op => 0x00000027 },
'or' => { :format => :r, :op => 0x00000025 },
'ori' => { :format => :i, :op => 0x34000000 },
'sb' => { :format => :i, :op => 0xa0000000 },
'sll' => { :format => :r, :op => 0x00000000 },
'slt' => { :format => :r, :op => 0x0000002a },
'srl' => { :format => :r, :op => 0x00000002 },
'sub' => { :format => :r, :op => 0x00000022 },
'sw' => { :format => :i, :op => 0xac000000 }
}
Shift_Instr = ['sll', 'srl']
Pseudo = {
'b' => [['beq', '$zero', '$zero', :"0"]],
'beqz' => [['beq', :"0", '$zero', :"1"]],
'blt' => [['slt','$at',:"0",:"1"],['bne', '$at','$zero',:"2"]],
'ble' => [['slt','$at',:"1",:"0"],['beq', '$at','$zero',:"2"]],
'bgt' => [['slt','$at',:"1",:"0"],['bne', '$at','$zero',:"2"]],
'bge' => [['slt','$at',:"0",:"0"],['beq', '$at','$zero',:"2"]],
'clear' => [['add', :"0", '$zero', '$zero']],
'li' => [['and', :"0", '$zero', '$zero'],['ori',:"0", '$zero', :"1"]],
'move' => [['and', :"0", '$zero', '$zero'],['or',:"0", '$zero', :"1"]]
}
# Global register assignments/lookups
RT = {
'$zero' => 0,
'$at' => 1,
'$v0' => 2,
'$v1' => 3,
'$a0' => 4,
'$a1' => 5,
'$a2' => 6,
'$a3' => 7,
'$t0' => 8,
'$t1' => 9,
'$t2' => 10,
'$t3' => 11,
'$t4' => 12,
'$t5' => 13,
'$t6' => 14,
'$t7' => 15,
'$s0' => 16,
'$s1' => 17,
'$s2' => 18,
'$s3' => 19,
'$s4' => 20,
'$s5' => 21,
'$s6' => 22,
'$s7' => 23,
'$t8' => 24,
'$t9' => 25,
'$k0' => 26,
'$k1' => 27,
'$gp' => 28,
'$sp' => 29,
'$fp' => 30,
'$ra' => 31
}
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment