Created
December 19, 2011 16:45
-
-
Save andrewpurcell/1497920 to your computer and use it in GitHub Desktop.
EE126 MIPS Assembler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# | |
# mips.rb - a class for working with MIPS assembly code | |
# in ruby. Provides an assembler to translate | |
# non floating point operations to machine code. | |
# | |
# Andrew Purcell, Tufts University | |
# EE126: Computer Engineering, Fall 2011 | |
class Assembler | |
# initialize(file_name) opens the file name given and defines | |
# the structures necessary to start compilation. | |
def initialize(file_name) | |
# Set the instruction width / word size | |
@@w = 4 | |
# open file | |
@file = File.open(file_name) || exit("Can't open file.") | |
@lines = [] | |
# read assembly file to memory | |
@file.each_line { |l| @lines << l.chomp unless l[/^\s+$/] } | |
@symbols = {} | |
@first_pass = [] | |
end | |
# on first pass: | |
# * parse each line of file to instructions/arguments | |
# * build a global symbol table | |
# * replace pseudo instructions with their decompositions | |
def first_pass() | |
# initialize section, offset for tracking symbols | |
mem_section = 0 | |
offset = 0 | |
@lines.each_index do |l| | |
ops = parse_line(@lines[l]) | |
pseudo = false | |
# record and remove labels | |
if ops[0][/^\w+:/] | |
label = ops.shift | |
@symbols[label.chop] = {:mem => mem_section, :offset => offset} | |
end | |
case ops[0] | |
when /^\w+/ # case for instructions | |
offset += @@w unless ops[0].nil? | |
# check if instruction is pseudo-instruction | |
if ops[0] and not Pseudo[ops[0]].nil? | |
sub = get_pseudo(ops) | |
sub.each { |s| @first_pass << parse_line(s) } | |
pseudo = true | |
end | |
when ".org" | |
# start new memory section | |
mem_section = ops[1].to_i | |
offset = 0 | |
when ".byte" | |
# will be translated in second pass | |
offset+=@@w | |
when ".end" | |
# this should be the last instruction, pass over for now | |
offset += @@w | |
# puts "Encountered a .end" | |
else | |
@error = "Invalid instruction, cannot parse at line #{l}: #{@lines[l]}" | |
puts @error | |
end | |
@first_pass << ops unless pseudo | |
end | |
end | |
# on second pass, translate instructions to machine code | |
def second_pass() | |
pc = 0 | |
@first_pass.each do |l| | |
i = parse_line(l) | |
# puts i | |
if directive? i | |
case i[0] | |
when ".byte" | |
printf "Address 0x%04x: %08x => %s\n",pc, translate_byte(i), i.to_s | |
when ".org" | |
pc = i[1].to_i | |
pc -= @@w | |
when ".end" | |
end | |
elsif not OP[i[0]].nil? | |
hex = case OP[i[0]][:format] | |
when :r | |
r_fmt(i) | |
when :i | |
i_fmt(i, pc) | |
when :j | |
j_fmt(i) | |
end | |
printf "Address 0x%04x: %08x => %s\n",pc, hex, i.to_s | |
else | |
puts "Invalid instruction: #{i}" | |
end | |
pc += @@w | |
end | |
end | |
def r_fmt(r) | |
if r.length != 4 | |
puts "Instruction #{r} is not valid" | |
return | |
end | |
code = OP[r[0]][:op].to_i | |
if is_shift? r[0] | |
code |= (r[3].to_i << 6) | |
else | |
code |= (get_reg r[3]) << 21 | |
# printf "%s => %s << 22 = %08x\n", r.to_s, r[3].to_s, (get_reg r[3]) << 21 | |
end | |
code |= (get_reg(r[1]) << 11)|(get_reg(r[2]) << 16) | |
code | |
end | |
def i_fmt(i, pc) | |
code = OP[i[0]][:op].to_i | |
code |= (get_reg(i[1]) << 21) | (get_reg(i[2]) << 16) #| (to_addr i[3]) | |
# handle branching offsets | |
if OP[i[0]][:branch] | |
code |= ((to_addr(i[3])-pc-@@w))&0xFFFF | |
# puts "branching, pc=0x#{pc.to_s(16)} label=0x#{to_addr(i[3]).to_s(16)}" | |
# printf "difference = %i = 0x%4x\n", | |
# ((to_addr(i[3])-(pc+@@w))), ((to_addr(i[3])-(pc+@@w))&0xFFFF) | |
else | |
code |= to_addr i[3] | |
end | |
code | |
end | |
def j_fmt(j) | |
if not @symbols[j[1]].nil? | |
return OP[j[0]][:op] | to_addr(j[1]) | |
elsif j[1].to_i != 0 | |
return OP[j[0]][:op] | j[1].to_i | |
else | |
puts "Error: label #{j[1]} not defined." | |
return OP[j[0]][:op] | |
end | |
end | |
# get pseudo takes an array of a parsed pseudo instruction | |
# and returns an array of native instructions | |
def get_pseudo(instr) | |
sub = Pseudo[instr[0]] # pull substitution from lookup table | |
args = instr.length-1 | |
(0...args).each do |i| | |
sub.each do |s| | |
if !s.index(i.to_s.to_sym).nil? | |
s[s.index(i.to_s.to_sym)] = instr[i+1] | |
end | |
end | |
end | |
sub.map! {|s| join_instr(s) } | |
# puts sub.to_s | |
sub | |
end | |
def join_instr(arr) | |
arr[0]+' '+arr[1..-1].join(", ") | |
end | |
def parse_line(line) | |
return line if line.class.eql? Array | |
line.split.map! {|t| t.split(',') }.flatten | |
end | |
def translate_byte(b) | |
shift = 32 | |
#b[1..-1].map {|n| shift -= 8; to_dec(n) << shift} | |
b[1..-1].inject(0) do |word, n| | |
shift-=8 | |
word |= (to_dec(n) << shift) | |
end | |
end | |
def to_dec(num) | |
return num.to_i(16) if num.to_s[/[hH]$/] | |
return num.to_i(2) if num.to_s[/[bB]$/] | |
num.to_i | |
end | |
def print_lines() | |
@lines.each { |l| puts l.to_s } | |
end | |
def print_first_pass() | |
@first_pass.each { |l| puts l.to_s } | |
end | |
def is_shift?(instr) | |
Shift_Instr.include? instr | |
end | |
def get_reg(reg) | |
RT[reg] || 0 | |
end | |
def print_symbol_table() | |
puts "****************" | |
puts " SYMBOL TABLE" | |
@symbols.each do |k,v| | |
puts "\t* #{k} => 0x#{((v[:mem] + v[:offset])).to_s(16)}" | |
puts "\t\t=#{v[:mem]} + #{v[:offset]}" | |
end | |
puts "****************" | |
end | |
def is_label?(l) | |
not @symbols[l].nil? | |
end | |
def directive?(line) | |
line[0][/^\./] != nil | |
end | |
def to_addr(label) | |
return label.to_i if label.to_i != 0 | |
return @symbols[label][:mem]+@symbols[label][:offset] if is_label?(label) | |
0 | |
end | |
# opcode lookup table | |
OP = { | |
'add' => { :format => :r, :op => 0x00000020 }, | |
'addi' => { :format => :i,:op => 0x20000000 }, | |
'and' => { :format => :r, :op => 0x00000024 }, | |
'andi' => { :format => :i,:op => 0x30000000 }, | |
'beq' => { :format => :i, :op => 0x10000000, :branch => true }, | |
'bne' => { :format => :i, :op => 0x14000000, :branch => true }, | |
'j' => { :format => :j, :op => 0x08000000 }, | |
'jal' => { :format => :j, :op => 0x03000000 }, | |
'jr' => { :format => :r, :op => 0x00000008 }, | |
'lb' => { :format => :i, :op => 0x80000000 }, | |
'lui' => { :format => :i, :op => 0x3c000000 }, | |
'lw' => { :format => :i, :op => 0x8c000000 }, | |
'nor' => { :format => :r, :op => 0x00000027 }, | |
'or' => { :format => :r, :op => 0x00000025 }, | |
'ori' => { :format => :i, :op => 0x34000000 }, | |
'sb' => { :format => :i, :op => 0xa0000000 }, | |
'sll' => { :format => :r, :op => 0x00000000 }, | |
'slt' => { :format => :r, :op => 0x0000002a }, | |
'srl' => { :format => :r, :op => 0x00000002 }, | |
'sub' => { :format => :r, :op => 0x00000022 }, | |
'sw' => { :format => :i, :op => 0xac000000 } | |
} | |
Shift_Instr = ['sll', 'srl'] | |
Pseudo = { | |
'b' => [['beq', '$zero', '$zero', :"0"]], | |
'beqz' => [['beq', :"0", '$zero', :"1"]], | |
'blt' => [['slt','$at',:"0",:"1"],['bne', '$at','$zero',:"2"]], | |
'ble' => [['slt','$at',:"1",:"0"],['beq', '$at','$zero',:"2"]], | |
'bgt' => [['slt','$at',:"1",:"0"],['bne', '$at','$zero',:"2"]], | |
'bge' => [['slt','$at',:"0",:"0"],['beq', '$at','$zero',:"2"]], | |
'clear' => [['add', :"0", '$zero', '$zero']], | |
'li' => [['and', :"0", '$zero', '$zero'],['ori',:"0", '$zero', :"1"]], | |
'move' => [['and', :"0", '$zero', '$zero'],['or',:"0", '$zero', :"1"]] | |
} | |
# Global register assignments/lookups | |
RT = { | |
'$zero' => 0, | |
'$at' => 1, | |
'$v0' => 2, | |
'$v1' => 3, | |
'$a0' => 4, | |
'$a1' => 5, | |
'$a2' => 6, | |
'$a3' => 7, | |
'$t0' => 8, | |
'$t1' => 9, | |
'$t2' => 10, | |
'$t3' => 11, | |
'$t4' => 12, | |
'$t5' => 13, | |
'$t6' => 14, | |
'$t7' => 15, | |
'$s0' => 16, | |
'$s1' => 17, | |
'$s2' => 18, | |
'$s3' => 19, | |
'$s4' => 20, | |
'$s5' => 21, | |
'$s6' => 22, | |
'$s7' => 23, | |
'$t8' => 24, | |
'$t9' => 25, | |
'$k0' => 26, | |
'$k1' => 27, | |
'$gp' => 28, | |
'$sp' => 29, | |
'$fp' => 30, | |
'$ra' => 31 | |
} | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment