Skip to content

Instantly share code, notes, and snippets.

@adamnew123456
Last active March 6, 2016 15:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adamnew123456/6b4743bc1c9251637d6b to your computer and use it in GitHub Desktop.
Save adamnew123456/6b4743bc1c9251637d6b to your computer and use it in GitHub Desktop.
Calculate .data label addresses for simple MARS programs
# Computes the offsets of values in a MIPS .data segment
my $in_data = 0;
my $mem_offset;
my $current_label = "";
# Gets rid of C escapes like \n - note that this is only meant to preserve
# the length of the string, so it uses single-character placeholders rather
# than actually interpreting the escapes. For example, it would replace:
#
# Foo\'s bar\n
#
# With the text:
#
# FooXs barX
sub process_c_escapes {
# Note that the substitution is for X - this is because we only care
# about the length of the result, and not its contents
my $str = shift;
# Get rid of all potential double-escapes, to prevent handling any escaped
# backslashes later
$str =~ s/\\\\/X/g;
$str =~ s/\\'/X/g;
$str =~ s/\\"/X/g;
$str =~ s/\\[?]/X/g;
$str =~ s/\\a/X/g;
$str =~ s/\\b/X/g;
$str =~ s/\\f/X/g;
$str =~ s/\\n/X/g;
$str =~ s/\\r/X/g;
$str =~ s/\\t/X/g;
$str =~ s/\\v/X/g;
$str =~ s/\\[0-8]{3}/X/g;
$str =~ s/\\[0-9a-fA-F]{2}/X/g;
return $str;
}
# This ensures that the address is the next multiple of its argument - for
# example, align_offset(4) would ensure that the address is moved up to fall
# on a word boundary.
sub align_offset {
my $align = shift;
my $misalign = $mem_offset % $align;
if ($misalign != 0) {
$mem_offset = ($mem_offset - $misalign) + $align;
}
}
# Prints the current label and its starting address in hex
sub print_label {
if ($current_label ne "") {
printf("%s: 0x%x\n", $current_label, $mem_offset);
$current_label = "";
}
}
# Converts both hexadecimal and decimal strings to integers
sub to_int {
my $str = shift;
if ($str =~ /^0x[0-9A-Fa-f]+$/) {
return hex($str);
} elsif ($str =~ /^[0-9]+$/) {
return $str + 0;
} else {
die "Invalid .data offset: $str";
}
}
die "$0 <filename>" if $#ARGV < 0;
open(my $file, "<", $ARGV[0]);
while (<$file>) {
chomp;
if (/^[.]data/) {
# Pull the offset from the data address
my @parts = split(/\s+/);
$mem_offset = to_int($parts[1]);
$in_data = 1;
} elsif (/^[.]text/) {
$in_data = 0;
} elsif (/^[A-Za-z0-9_]+:/) {
next if ($in_data == 0);
# Record the data label so that the next data instruction can print
# it out
my @matches = $_ =~ /^([A-Za-z0-9_]+):/;
$current_label = $matches[0];
} elsif (/[.]ascii/) {
next if ($in_data == 0);
print_label;
my $string_start = index($_, '"') + 1;
my $string_end = rindex($_, '"');
my $string = substr($_, $string_start, $string_end - $string_start);
$string = process_c_escapes($string);
$mem_offset += length($string);
$mem_offset++ if (/[.]asciiz/);
} elsif (/[.]byte/) {
next if ($in_data == 0);
print_label;
# The content of the bytes is irrelevant, we just need the space they occupy
my @bytes = split(/,/);
$mem_offset += scalar @bytes;
} elsif (/[.]halfword/) {
next if ($in_data == 0);
align_offset(2);
print_label;
my @halves = split(/,/);
$mem_offset += scalar @halves;
} elsif (/[.]word/) {
next if ($in_data == 0);
align_offset(4);
print_label;
my @words = split(/,/);
$mem_offset += scalar @words;
} elsif (/[.]space/) {
next if ($in_data == 0);
print_label;
# If we don't chop the leading spaces, the split will return the wrong value
$_ = s/^\s+//;
my @parts = split(/\s+/);
$mem_offset += to_int($parts[0]);
}
}
close($file);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment