adamnew123456/compute-offsets.pl

## compute-offsets.pl
# Computes the offsets of values in a MIPS .data segment
my $in_data = 0;
my $mem_offset;
my $current_label = "";

# Gets rid of C escapes like \n - note that this is only meant to preserve
# the length of the string, so it uses single-character placeholders rather
# than actually interpreting the escapes. For example, it would replace:
#
#    Foo\'s bar\n
#
#  With the text:
#
#    FooXs barX

sub process_c_escapes {
    # Note that the substitution is for X - this is because we only care
    # about the length of the result, and not its contents
    my $str = shift;

    # Get rid of all potential double-escapes, to prevent handling any escaped
    # backslashes later
    $str =~ s/\\\\/X/g;

    $str =~ s/\\'/X/g;
    $str =~ s/\\"/X/g;
    $str =~ s/\\[?]/X/g;
    $str =~ s/\\a/X/g;
    $str =~ s/\\b/X/g;
    $str =~ s/\\f/X/g;
    $str =~ s/\\n/X/g;
    $str =~ s/\\r/X/g;
    $str =~ s/\\t/X/g;
    $str =~ s/\\v/X/g;

    $str =~ s/\\[0-8]{3}/X/g;
    $str =~ s/\\[0-9a-fA-F]{2}/X/g;

    return $str;
}

# This ensures that the address is the next multiple of its argument - for
# example, align_offset(4) would ensure that the address is moved up to fall
# on a word boundary.
sub align_offset {
    my $align = shift;
    my $misalign = $mem_offset % $align;

    if ($misalign != 0) {
        $mem_offset = ($mem_offset - $misalign) + $align;
    }
}

# Prints the current label and its starting address in hex
sub print_label {
    if ($current_label ne "") {
        printf("%s: 0x%x\n", $current_label, $mem_offset);
        $current_label = "";
    }
}

# Converts both hexadecimal and decimal strings to integers
sub to_int {
    my $str = shift;

    if ($str =~ /^0x[0-9A-Fa-f]+$/) {
        return hex($str);
    } elsif ($str =~ /^[0-9]+$/) {
        return $str + 0;
    } else {
        die "Invalid .data offset: $str";
    }
}

die "$0 <filename>" if $#ARGV < 0;
open(my $file, "<", $ARGV[0]);

while (<$file>) {
    chomp;

    if (/^[.]data/) {
        # Pull the offset from the data address
        my @parts = split(/\s+/);

        $mem_offset = to_int($parts[1]);
        $in_data = 1;
    } elsif (/^[.]text/) {
        $in_data = 0;
    } elsif (/^[A-Za-z0-9_]+:/) {
        next if ($in_data == 0);

        # Record the data label so that the next data instruction can print
        # it out
        my @matches = $_ =~ /^([A-Za-z0-9_]+):/;
        $current_label = $matches[0];
    } elsif (/[.]ascii/) {
        next if ($in_data == 0);
        print_label;

        my $string_start = index($_, '"') + 1;
        my $string_end = rindex($_, '"');

        my $string = substr($_, $string_start, $string_end - $string_start);
        $string = process_c_escapes($string);

        $mem_offset += length($string);
        $mem_offset++ if (/[.]asciiz/);
    } elsif (/[.]byte/) {
        next if ($in_data == 0);
        print_label;

        # The content of the bytes is irrelevant, we just need the space they occupy
        my @bytes = split(/,/);
        $mem_offset += scalar @bytes;
    } elsif (/[.]halfword/) {
        next if ($in_data == 0);
        align_offset(2);
        print_label;

        my @halves = split(/,/);
        $mem_offset += scalar @halves;
    } elsif (/[.]word/) {
        next if ($in_data == 0);
        align_offset(4);
        print_label;

        my @words = split(/,/);
        $mem_offset += scalar @words;
    } elsif (/[.]space/) {
        next if ($in_data == 0);
        print_label;

        # If we don't chop the leading spaces, the split will return the wrong value
        $_ = s/^\s+//;
        my @parts = split(/\s+/);

        $mem_offset += to_int($parts[0]);
    }
}

close($file);
	# Computes the offsets of values in a MIPS .data segment
	my $in_data = 0;
	my $mem_offset;
	my $current_label = "";

	# Gets rid of C escapes like \n - note that this is only meant to preserve
	# the length of the string, so it uses single-character placeholders rather
	# than actually interpreting the escapes. For example, it would replace:
	#
	# Foo\'s bar\n
	#
	# With the text:
	#
	# FooXs barX

	sub process_c_escapes {
	# Note that the substitution is for X - this is because we only care
	# about the length of the result, and not its contents
	my $str = shift;

	# Get rid of all potential double-escapes, to prevent handling any escaped
	# backslashes later
	$str =~ s/\\\\/X/g;

	$str =~ s/\\'/X/g;
	$str =~ s/\\"/X/g;
	$str =~ s/\\[?]/X/g;
	$str =~ s/\\a/X/g;
	$str =~ s/\\b/X/g;
	$str =~ s/\\f/X/g;
	$str =~ s/\\n/X/g;
	$str =~ s/\\r/X/g;
	$str =~ s/\\t/X/g;
	$str =~ s/\\v/X/g;

	$str =~ s/\\[0-8]{3}/X/g;
	$str =~ s/\\[0-9a-fA-F]{2}/X/g;

	return $str;
	}

	# This ensures that the address is the next multiple of its argument - for
	# example, align_offset(4) would ensure that the address is moved up to fall
	# on a word boundary.
	sub align_offset {
	my $align = shift;
	my $misalign = $mem_offset % $align;

	if ($misalign != 0) {
	$mem_offset = ($mem_offset - $misalign) + $align;
	}
	}

	# Prints the current label and its starting address in hex
	sub print_label {
	if ($current_label ne "") {
	printf("%s: 0x%x\n", $current_label, $mem_offset);
	$current_label = "";
	}
	}

	# Converts both hexadecimal and decimal strings to integers
	sub to_int {
	my $str = shift;

	if ($str =~ /^0x[0-9A-Fa-f]+$/) {
	return hex($str);
	} elsif ($str =~ /^[0-9]+$/) {
	return $str + 0;
	} else {
	die "Invalid .data offset: $str";
	}
	}

	die "$0 <filename>" if $#ARGV < 0;
	open(my $file, "<", $ARGV[0]);

	while (<$file>) {
	chomp;

	if (/^[.]data/) {
	# Pull the offset from the data address
	my @parts = split(/\s+/);

	$mem_offset = to_int($parts[1]);
	$in_data = 1;
	} elsif (/^[.]text/) {
	$in_data = 0;
	} elsif (/^[A-Za-z0-9_]+:/) {
	next if ($in_data == 0);

	# Record the data label so that the next data instruction can print
	# it out
	my @matches = $_ =~ /^([A-Za-z0-9_]+):/;
	$current_label = $matches[0];
	} elsif (/[.]ascii/) {
	next if ($in_data == 0);
	print_label;

	my $string_start = index($_, '"') + 1;
	my $string_end = rindex($_, '"');

	my $string = substr($_, $string_start, $string_end - $string_start);
	$string = process_c_escapes($string);

	$mem_offset += length($string);
	$mem_offset++ if (/[.]asciiz/);
	} elsif (/[.]byte/) {
	next if ($in_data == 0);
	print_label;

	# The content of the bytes is irrelevant, we just need the space they occupy
	my @bytes = split(/,/);
	$mem_offset += scalar @bytes;
	} elsif (/[.]halfword/) {
	next if ($in_data == 0);
	align_offset(2);
	print_label;

	my @halves = split(/,/);
	$mem_offset += scalar @halves;
	} elsif (/[.]word/) {
	next if ($in_data == 0);
	align_offset(4);
	print_label;

	my @words = split(/,/);
	$mem_offset += scalar @words;
	} elsif (/[.]space/) {
	next if ($in_data == 0);
	print_label;

	# If we don't chop the leading spaces, the split will return the wrong value
	$_ = s/^\s+//;
	my @parts = split(/\s+/);

	$mem_offset += to_int($parts[0]);
	}
	}

	close($file);