Last active
March 6, 2016 15:35
-
-
Save adamnew123456/6b4743bc1c9251637d6b to your computer and use it in GitHub Desktop.
Calculate .data label addresses for simple MARS programs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Computes the offsets of values in a MIPS .data segment | |
my $in_data = 0; | |
my $mem_offset; | |
my $current_label = ""; | |
# Gets rid of C escapes like \n - note that this is only meant to preserve | |
# the length of the string, so it uses single-character placeholders rather | |
# than actually interpreting the escapes. For example, it would replace: | |
# | |
# Foo\'s bar\n | |
# | |
# With the text: | |
# | |
# FooXs barX | |
sub process_c_escapes { | |
# Note that the substitution is for X - this is because we only care | |
# about the length of the result, and not its contents | |
my $str = shift; | |
# Get rid of all potential double-escapes, to prevent handling any escaped | |
# backslashes later | |
$str =~ s/\\\\/X/g; | |
$str =~ s/\\'/X/g; | |
$str =~ s/\\"/X/g; | |
$str =~ s/\\[?]/X/g; | |
$str =~ s/\\a/X/g; | |
$str =~ s/\\b/X/g; | |
$str =~ s/\\f/X/g; | |
$str =~ s/\\n/X/g; | |
$str =~ s/\\r/X/g; | |
$str =~ s/\\t/X/g; | |
$str =~ s/\\v/X/g; | |
$str =~ s/\\[0-8]{3}/X/g; | |
$str =~ s/\\[0-9a-fA-F]{2}/X/g; | |
return $str; | |
} | |
# This ensures that the address is the next multiple of its argument - for | |
# example, align_offset(4) would ensure that the address is moved up to fall | |
# on a word boundary. | |
sub align_offset { | |
my $align = shift; | |
my $misalign = $mem_offset % $align; | |
if ($misalign != 0) { | |
$mem_offset = ($mem_offset - $misalign) + $align; | |
} | |
} | |
# Prints the current label and its starting address in hex | |
sub print_label { | |
if ($current_label ne "") { | |
printf("%s: 0x%x\n", $current_label, $mem_offset); | |
$current_label = ""; | |
} | |
} | |
# Converts both hexadecimal and decimal strings to integers | |
sub to_int { | |
my $str = shift; | |
if ($str =~ /^0x[0-9A-Fa-f]+$/) { | |
return hex($str); | |
} elsif ($str =~ /^[0-9]+$/) { | |
return $str + 0; | |
} else { | |
die "Invalid .data offset: $str"; | |
} | |
} | |
die "$0 <filename>" if $#ARGV < 0; | |
open(my $file, "<", $ARGV[0]); | |
while (<$file>) { | |
chomp; | |
if (/^[.]data/) { | |
# Pull the offset from the data address | |
my @parts = split(/\s+/); | |
$mem_offset = to_int($parts[1]); | |
$in_data = 1; | |
} elsif (/^[.]text/) { | |
$in_data = 0; | |
} elsif (/^[A-Za-z0-9_]+:/) { | |
next if ($in_data == 0); | |
# Record the data label so that the next data instruction can print | |
# it out | |
my @matches = $_ =~ /^([A-Za-z0-9_]+):/; | |
$current_label = $matches[0]; | |
} elsif (/[.]ascii/) { | |
next if ($in_data == 0); | |
print_label; | |
my $string_start = index($_, '"') + 1; | |
my $string_end = rindex($_, '"'); | |
my $string = substr($_, $string_start, $string_end - $string_start); | |
$string = process_c_escapes($string); | |
$mem_offset += length($string); | |
$mem_offset++ if (/[.]asciiz/); | |
} elsif (/[.]byte/) { | |
next if ($in_data == 0); | |
print_label; | |
# The content of the bytes is irrelevant, we just need the space they occupy | |
my @bytes = split(/,/); | |
$mem_offset += scalar @bytes; | |
} elsif (/[.]halfword/) { | |
next if ($in_data == 0); | |
align_offset(2); | |
print_label; | |
my @halves = split(/,/); | |
$mem_offset += scalar @halves; | |
} elsif (/[.]word/) { | |
next if ($in_data == 0); | |
align_offset(4); | |
print_label; | |
my @words = split(/,/); | |
$mem_offset += scalar @words; | |
} elsif (/[.]space/) { | |
next if ($in_data == 0); | |
print_label; | |
# If we don't chop the leading spaces, the split will return the wrong value | |
$_ = s/^\s+//; | |
my @parts = split(/\s+/); | |
$mem_offset += to_int($parts[0]); | |
} | |
} | |
close($file); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment