Skip to content

Instantly share code, notes, and snippets.

@kemenaran
Created November 24, 2020 17:28
Show Gist options
  • Save kemenaran/dc9232384e9eaf39196854757a6a2b36 to your computer and use it in GitHub Desktop.
Save kemenaran/dc9232384e9eaf39196854757a6a2b36 to your computer and use it in GitHub Desktop.
Adding Sorbet type annotations to a standalone Ruby script
#!/usr/bin/env ruby
#
# Given a source file, scan the unresolved data raw addresses,
# and emit an SYM file with additional data labels for those addresses.
#
# This SYM-file can then be fed into mgbdis to generate a new disassembly
# that includes the proper data blocks.
#
# For best results:
# - Document symbols in the original source file the best you can.
# - Label the jump tables.
# - Label internal call and jp addreses
# - Maybe remove duplicated labels for the same location
require 'sorbet-runtime'
def usage
puts "Usage:"
puts " tools/fix-data-sym.rb <asm-file> <bank-number>"
end
class LocalAddress < T::Struct
extend T::Sig
const :bank, Integer
const :offset, Integer
sig {params(bank: Integer, offset: Integer).returns(LocalAddress)}
def self.new(bank, offset)
super.new(bank: bank, offset: offset)
end
sig {params(str: String).returns(LocalAddress)}
def self.from_string(str)
bank, offset = str.split(':')
raise "Invalid address format" if bank.nil? || offset.nil?
self.new(bank.to_i(16), offset.to_i(16))
end
sig {returns(String)}
def to_s
("%02x:%02x" % [bank, offset]).upcase
end
sig {returns(Integer)}
def to_global
return [bank - 1, 0].max * 0x4000 + offset
end
sig {returns(String)}
def inspect
"#<LocalAddress #{to_s}>"
end
sig {params(bank_number: Integer).returns(T::Boolean)}
def in_bank?(bank_number)
return bank == bank_number && (0x4000...0x8000).include?(offset)
end
end
module Mgbdis
class Symbol < T::Struct
extend T::Sig
prop :address, LocalAddress
prop :label, String
prop :length, T.nilable(Integer)
sig {params(address: LocalAddress, label: String).returns(Symbol)}
def self.new(address, label)
super.new(address: address, label: label)
end
sig {params(str: String).returns(Symbol)}
def self.from_string(str)
address, label = str.split(' ')
raise "Invalid symbol format" if address.nil? || label.nil?
self.new(LocalAddress.from_string(address), label)
end
sig {params(address: LocalAddress).returns(Symbol)}
def self.from_address(address)
label = "Data_" + ("%03x_%04x" % [address.bank, address.offset]).upcase
self.new(address, label)
end
sig {returns(T::Boolean)}
def data_label?
label.downcase.start_with?('data_')
end
end
end
asm_file, bank_number, *the_rest = ARGV
if asm_file.nil? || bank_number.nil?
usage
exit -1
end
bank_number = bank_number.to_i(16)
sym_file = 'game.sym'
# Read the existing symbols from the SYM files
all_symbols = File.readlines(sym_file)
.map(&:strip)
.reject { |l| l.start_with?(';') || l.empty? }
.map { |l| Mgbdis::Symbol.from_string(l) }
# Augment the symbols with the raw addresses read from the source code
missing_symbols = File.readlines(asm_file)
.map { |l| l.match(/ld (hl|de|bc), \$([4-7][0-9A-Z]{3})/) }
.compact
.map { |match| T.must(match[2]).to_i(16) }
.map { |offset| LocalAddress.new(bank_number, offset) }
.map { |address| Mgbdis::Symbol.from_address(address) }
all_symbols = (all_symbols + missing_symbols).uniq
# Extract the symbols for the requested bank
symbols = all_symbols.reject { |s| s.address.bank != bank_number }
# Sort the bank symbols by address
symbols.sort_by! { |s| s.address.offset }
# Data previously interpreted as code produced a lot of bogus 'jr_' labels.
# These labels end up in the debug symbols, but should be ignored.
#
# Remove symbols prefixed by 'jr_' who are preceeded by a 'data_' symbol.
jrs_to_reject = symbols.select.with_index do |sym, index|
if sym.label.start_with?('jr_') && index >= 1
symbols_before = T.must(symbols[0..(index - 1)])
nearest_non_jr_index = symbols_before.rindex { |s| !s.label.start_with?('jr_') }
if !nearest_non_jr_index.nil?
nearest_non_jr_sym = T.must(symbols[nearest_non_jr_index])
if nearest_non_jr_sym.data_label?
next true
end
end
end
false
end
all_symbols -= jrs_to_reject
symbols -= jrs_to_reject
# For each data symbol, compute its actual length
# (i.e. the distance with the next symbol.)
symbols.each.with_index do |sym, index|
if sym.data_label?
next_sym = symbols[index + 1]
next_sym_address = next_sym&.address&.offset || 0x7fff
sym.length = next_sym_address - sym.address.offset
end
end
# Emit the mgbdis-compatible symbols, including the fixed data
all_symbols
.each do |s|
puts "#{s.address} #{s.label}"
if !s.length.nil?
puts "#{s.address} .data:%x" % [s.length]
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment