Skip to content

Instantly share code, notes, and snippets.

@PikalaxALT
Last active January 8, 2022 17:29
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PikalaxALT/2b9b82d6680e574d8b6f75660e9bb957 to your computer and use it in GitHub Desktop.
Save PikalaxALT/2b9b82d6680e574d8b6f75660e9bb957 to your computer and use it in GitHub Desktop.
Rudimentary GBZ80 disassembler (MBC3 only for the time being)
import argparse
from functools import total_ordering
from z80table import z80table, extdtable
import re
def fread(file, n):
return int.from_bytes(file.read(n), 'little')
def gbaddr2offset(bank, addr):
return (bank << 14) | (addr & 0x3fff)
def offset2gbaddr(offset):
bank, addr = divmod(offset, 0x4000)
if bank:
addr |= 0x4000
return bank, addr
registers = {
0x00: 'rJOYP',
0x01: 'rSB',
0x02: 'rSC',
0x04: 'rDIV',
0x05: 'rTIMA',
0x06: 'rTMA',
0x07: 'rTAC',
0x0f: 'rIF',
0x10: 'rNR10',
0x11: 'rNR11',
0x12: 'rNR12',
0x13: 'rNR13',
0x14: 'rNR14',
0x15: 'rNR20',
0x16: 'rNR21',
0x17: 'rNR22',
0x18: 'rNR23',
0x19: 'rNR24',
0x1a: 'rNR30',
0x1b: 'rNR31',
0x1c: 'rNR32',
0x1d: 'rNR33',
0x1e: 'rNR34',
0x1f: 'rNR40',
0x20: 'rNR41',
0x21: 'rNR42',
0x22: 'rNR43',
0x23: 'rNR44',
0x24: 'rNR50',
0x25: 'rNR51',
0x26: 'rNR52',
0x30: 'rWave_0',
0x31: 'rWave_1',
0x32: 'rWave_2',
0x33: 'rWave_3',
0x34: 'rWave_4',
0x35: 'rWave_5',
0x36: 'rWave_6',
0x37: 'rWave_7',
0x38: 'rWave_8',
0x39: 'rWave_9',
0x3a: 'rWave_a',
0x3b: 'rWave_b',
0x3c: 'rWave_c',
0x3d: 'rWave_d',
0x3e: 'rWave_e',
0x3f: 'rWave_f',
0x40: 'rLCDC',
0x41: 'rSTAT',
0x42: 'rSCY',
0x43: 'rSCX',
0x44: 'rLY',
0x45: 'rLYC',
0x46: 'rDMA',
0x47: 'rBGP',
0x48: 'rOBP0',
0x49: 'rOBP1',
0x4a: 'rWY',
0x4b: 'rWX',
0x4c: 'rLCDMODE',
0x4d: 'rKEY1',
0x4f: 'rVBK',
0x50: 'rBLCK',
0x51: 'rHDMA1',
0x52: 'rHDMA2',
0x53: 'rHDMA3',
0x54: 'rHDMA4',
0x55: 'rHDMA5',
0x56: 'rRP',
0x68: 'rBGPI',
0x69: 'rBGPD',
0x6a: 'rOBPI',
0x6b: 'rOBPD',
0x6c: 'rUNKNOWN1',
0x70: 'rSVBK',
0x72: 'rUNKNOWN2',
0x73: 'rUNKNOWN3',
0x74: 'rUNKNOWN4',
0x75: 'rUNKNOWN5',
0x76: 'rUNKNOWN6',
0x77: 'rUNKNOWN7',
0xff: 'rIE'
}
@total_ordering
class GBSection:
ROM0 = 0
ROMX = 1
VRAM = 2
SRAM = 3
WRAM0 = 4
WRAMX = 5
__thresholds__ = (0x4000, 0x8000, 0xa000, 0xc000, 0xd000, 0xe000)
def __init__(self, addr):
self.ident = sum(x <= addr for x in self.__thresholds__)
@property
def start(self):
if self.ident == self.ROM0:
return 0
return self.__thresholds__[self.ident - 1]
@property
def end(self):
return self.__thresholds__[self.ident]
def __eq__(self, other):
return self.ident == other.ident
def __lt__(self, other):
return self.ident < other.ident
def read_sym(filename):
with open(filename) as fp:
for line in fp:
try:
line, *rest = line.split(';')
pointer, name = line.split()
bank, addr = (int(part, 16) for part in pointer.split(':'))
yield bank, addr, name
except ValueError:
continue
class Symfile(dict):
def __init__(self, filename):
super().__init__()
for bank, addr, name in read_sym(filename):
if '.' in name:
# name = '.' + name.split('.')[-1]
continue
self[(bank, addr)] = name
class MBC:
MBC1 = 1
MBC2 = 2
MBC3 = 3
MBC5 = 5
MBC6 = 6
MBC7 = 7
MMM01 = 11
HuC1 = 21
HuC3 = 23
OTHER = 99
def __init__(self, mbc):
# This only support MBC3 for now.
if mbc != self.MBC3:
raise NotImplemented('Only MBC3 is supported')
self.type = mbc
@property
def sram_enable(self):
return 0x0000
@property
def rom_bank(self):
return 0x2000
@property
def sram_bank(self):
return 0x4000
@property
def latch_clock(self):
return 0x6000
@property
def rtc(self):
return 0xa000
@property
def name(self):
return 'MBC3'
@property
def reserved(self):
return {
self.name + 'SRamEnable': self.sram_enable,
self.name + 'RomBank': self.rom_bank,
self.name + 'SRamBank': self.sram_bank,
self.name + 'LatchClock': self.latch_clock,
self.name + 'RTC': self.rtc
}
class Z80Disassembler:
def __init__(self, fname, start, end):
self.fname = fname
self.file = None
self.start = start
self.bank = start >> 14
self.end = end
self.mbc = None
def __enter__(self):
self.file = open(self.fname, 'rb')
self.file.seek(0x147)
self.mbc = MBC(self.read())
self.file.seek(self.start)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.file.__exit__(exc_type, exc_val, exc_tb)
self.file = None
def read(self, n=1):
return int.from_bytes(self.file.read(n), 'little')
def __iter__(self):
pc = self.file.tell()
while pc < self.end:
instr_name, argsize = z80table[self.read()]
if instr_name == 'extd':
instr_name = extdtable[self.read()]
argsize = 0
if argsize == 0:
arg = 0
else:
arg = self.read(argsize)
yield pc, instr_name, argsize, arg
pc = self.file.tell()
def arg_is_addr(instr_name, arg):
if any(instr_name.startswith(op) for op in ('jr', 'jp', 'call')):
return True
if instr_name.startswith('ld'):
if '[' in instr_name:
return True
if arg >= 0x150:
return True
return 'sp+' not in instr_name
return False
def get_key(bank, addr):
if addr < 0x4000:
bank = 0
elif addr < 0x8000:
if bank == 0:
bank = 1 # we don't actually know lol
elif addr < 0xd000:
bank = 0
elif addr < 0xe000:
bank = 1 # we don't actually know lol
else:
bank = 0
return bank, addr
def main():
parser = argparse.ArgumentParser()
parser.add_argument('rom')
parser.add_argument('start', type=lambda s: int(s, 0))
parser.add_argument('end', type=lambda s: int(s, 0))
parser.add_argument('-s', '--sym', type=Symfile, default={})
args = parser.parse_args()
with Z80Disassembler(args.rom, args.start, args.end) as dismbler:
for name, sym in dismbler.mbc.reserved.items():
args.sym.setdefault((0, sym), name)
for pc, instr_name, argsize, arg in dismbler:
if argsize != 0 and arg_is_addr(instr_name, arg):
if 'jr' in instr_name:
if arg & 0x80:
arg -= 0x100
arg += pc + 2
fmtstr = '.asm_{:04x}'
elif 'ld' in instr_name:
fmtstr = 'Unknown_{:04x}'
else:
fmtstr = 'Function{:04x}'
if 'ldh' in instr_name:
fmtstr = registers.get(arg, 'h{:04x}')
arg |= 0xff00
key = get_key(dismbler.bank, arg)
if arg < 0x8000:
arg = gbaddr2offset(*key)
elif 0xa000 <= arg < 0xc000:
fmtstr = 's{:04x}'
elif 0xc000 <= arg < 0xe000:
fmtstr = 'w{:04x}'
args.sym.setdefault(key, fmtstr.format(arg))
with Z80Disassembler(args.rom, args.start, args.end) as dismbler:
for pc, instr_name, argsize, arg in dismbler:
if argsize != 0:
if argsize == 2 or 'ldh' in instr_name or 'jr' in instr_name:
if 'jr' in instr_name:
if arg & 0x80:
arg -= 0x100
arg = pc + 2 + arg
elif 'ldh' in instr_name:
arg |= 0xff00
key = get_key(dismbler.bank, arg)
arg = args.sym.get(key, arg)
key = offset2gbaddr(pc)
if key in args.sym:
name = args.sym[key]
colons = ':' * ((not name.startswith('.')) + 1)
print(f'{args.sym[key]}{colons} ; {key[0]:02x}:{key[1]:04x}')
if isinstance(arg, str):
instr_name = re.sub(r':\d*x', '', instr_name)
instr_name = instr_name.replace('$', '')
if 'ldh' in instr_name:
instr_name = instr_name.replace('ff', '')
elif 'ldh' in instr_name:
arg &= 0xFF
print(f'\t{instr_name.format(arg)}')
if __name__ == '__main__':
main()
z80table = [
# $00
('nop', 0),
('ld bc, ${:x}', 2),
('ld [bc], a', 0),
('inc bc', 0),
('inc b', 0),
('dec b', 0),
('ld b, ${:x}', 1),
('rlca', 0),
# $08
('ld [${:x}], sp', 2),
('add hl, bc', 0),
('ld a, [bc]', 0),
('dec bc', 0),
('inc c', 0),
('dec c', 0),
('ld c, ${:x}', 1),
('rrca', 0),
# $10
('stop', 0),
('ld de, ${:x}', 2),
('ld [de], a', 0),
('inc de', 0),
('inc d', 0),
('dec d', 0),
('ld d, ${:x}', 1),
('rla', 0),
# $18
('jr ${:x}\n', 1),
('add hl, de', 0),
('ld a, [de]', 0),
('dec de', 0),
('inc e', 0),
('dec e', 0),
('ld e, ${:x}', 1),
('rra', 0),
# $20
('jr nz, ${:x}', 1),
('ld hl, ${:x}', 2),
('ld [hli], a', 0),
('inc hl', 0),
('inc h', 0),
('dec h', 0),
('ld h, ${:x}', 1),
('daa', 0),
# $28
('jr z, ${:x}', 1),
('add hl, hl', 0),
('ld a, [hli]', 0),
('dec hl', 0),
('inc l', 0),
('dec l', 0),
('ld l, ${:x}', 1),
('cpl', 0),
# $30
('jr nc, ${:x}', 1),
('ld sp, ${:x}', 2),
('ld [hld], a', 0),
('inc sp', 0),
('inc [hl]', 0),
('dec [hl]', 0),
('ld [hl], ${:x}', 1),
('scf', 0),
# $38
('jr c, ${:x}', 1),
('add hl, sp', 0),
('ld a, [hld]', 0),
('dec sp', 0),
('inc a', 0),
('dec a', 0),
('ld a, ${:x}', 1),
('ccf', 0),
# $40
('ld b, b', 0),
('ld b, c', 0),
('ld b, d', 0),
('ld b, e', 0),
('ld b, h', 0),
('ld b, l', 0),
('ld b, [hl]', 0),
('ld b, a', 0),
# $48
('ld c, b', 0),
('ld c, c', 0),
('ld c, d', 0),
('ld c, e', 0),
('ld c, h', 0),
('ld c, l', 0),
('ld c, [hl]', 0),
('ld c, a', 0),
# $50
('ld d, b', 0),
('ld d, c', 0),
('ld d, d', 0),
('ld d, e', 0),
('ld d, h', 0),
('ld d, l', 0),
('ld d, [hl]', 0),
('ld d, a', 0),
# $58
('ld e, b', 0),
('ld e, c', 0),
('ld e, d', 0),
('ld e, e', 0),
('ld e, h', 0),
('ld e, l', 0),
('ld e, [hl]', 0),
('ld e, a', 0),
# $60
('ld h, b', 0),
('ld h, c', 0),
('ld h, d', 0),
('ld h, e', 0),
('ld h, h', 0),
('ld h, l', 0),
('ld h, [hl]', 0),
('ld h, a', 0),
# $68
('ld l, b', 0),
('ld l, c', 0),
('ld l, d', 0),
('ld l, e', 0),
('ld l, h', 0),
('ld l, l', 0),
('ld l, [hl]', 0),
('ld l, a', 0),
# $70
('ld [hl], b', 0),
('ld [hl], c', 0),
('ld [hl], d', 0),
('ld [hl], e', 0),
('ld [hl], h', 0),
('ld [hl], l', 0),
('halt', 0),
('ld [hl], a', 0),
# $78
('ld a, b', 0),
('ld a, c', 0),
('ld a, d', 0),
('ld a, e', 0),
('ld a, h', 0),
('ld a, l', 0),
('ld a, [hl]', 0),
('ld a, a', 0),
# $80
('add b', 0),
('add c', 0),
('add d', 0),
('add e', 0),
('add h', 0),
('add l', 0),
('add [hl]', 0),
('add a', 0),
# $88
('adc b', 0),
('adc c', 0),
('adc d', 0),
('adc e', 0),
('adc h', 0),
('adc l', 0),
('adc [hl]', 0),
('adc a', 0),
# $90
('sub b', 0),
('sub c', 0),
('sub d', 0),
('sub e', 0),
('sub h', 0),
('sub l', 0),
('sub [hl]', 0),
('sub a', 0),
# $98
('sbc b', 0),
('sbc c', 0),
('sbc d', 0),
('sbc e', 0),
('sbc h', 0),
('sbc l', 0),
('sbc [hl]', 0),
('sbc a', 0),
# $a0
('and b', 0),
('and c', 0),
('and d', 0),
('and e', 0),
('and h', 0),
('and l', 0),
('and [hl]', 0),
('and a', 0),
# $a8
('xor b', 0),
('xor c', 0),
('xor d', 0),
('xor e', 0),
('xor h', 0),
('xor l', 0),
('xor [hl]', 0),
('xor a', 0),
# $b0
('or b', 0),
('or c', 0),
('or d', 0),
('or e', 0),
('or h', 0),
('or l', 0),
('or [hl]', 0),
('or a', 0),
# $b8
('cp b', 0),
('cp c', 0),
('cp d', 0),
('cp e', 0),
('cp h', 0),
('cp l', 0),
('cp [hl]', 0),
('cp a', 0),
# $c0
('ret nz', 0),
('pop bc', 0),
('jp nz, ${:x}', 2),
('jp ${:x}\n', 2),
('call nz, ${:x}', 2),
('push bc', 0),
('add ${:x}', 1),
('rst $0', 0),
# $c8
('ret z', 0),
('ret\n', 0),
('jp z, ${:x}', 2),
('extd', 0),
('call z, ${:x}', 2),
('call ${:x}', 2),
('adc ${:x}', 1),
('rst $08', 0),
# $d0
('ret nc', 0),
('pop de', 0),
('jp nc, ${:x}', 2),
('db $d3', 0),
('call nc, ${:x}', 2),
('push de', 0),
('sub ${:x}', 1),
('rst $10', 0),
# $d8
('ret c', 0),
('reti\n', 0),
('jp c, ${:x}', 2),
('db $db', 0),
('call c, ${:x}', 2),
('db $dd', 0),
('sbc ${:x}', 1),
('rst $18', 0),
# $e0
('ldh [$ff{:02x}], a', 1),
('pop hl', 0),
('ld [$ff00+c], a', 0),
('db $e3', 0),
('db $e4', 0),
('push hl', 0),
('and ${:x}', 1),
('rst $20', 0),
# $e8
('add sp, ${:x}', 1),
('jp hl\n', 0),
('ld [${:x}], a', 2),
('db $eb', 0),
('db $ec', 0),
('db $ed', 0),
('xor ${:x}', 1),
('rst $28', 0),
# $f0
('ldh a, [$ff{:02x}]', 1),
('pop af', 0),
('ld a, [$ff00+c]', 0),
('di', 0),
('db $f4', 0),
('push af', 0),
('or ${:x}', 1),
('rst $30', 0),
# $f8
('ld hl, sp+${:x}', 1),
('ld sp, hl', 0),
('ld a, [${:x}]', 2),
('ei', 0),
('db $fc', 0),
('db $fd', 0),
('cp ${:x}', 1),
('rst $38', 0)
]
extdtable = [
# $00
'rlc b',
'rlc c',
'rlc d',
'rlc e',
'rlc h',
'rlc l',
'rlc [hl]',
'rlc a',
# $08
'rrc b',
'rrc c',
'rrc d',
'rrc e',
'rrc h',
'rrc l',
'rrc [hl]',
'rrc a',
# $10
'rl b',
'rl c',
'rl d',
'rl e',
'rl h',
'rl l',
'rl [hl]',
'rl a',
# $18
'rr b',
'rr c',
'rr d',
'rr e',
'rr h',
'rr l',
'rr [hl]',
'rr a',
# $20
'sla b',
'sla c',
'sla d',
'sla e',
'sla h',
'sla l',
'sla [hl]',
'sla a',
# $28
'sra b',
'sra c',
'sra d',
'sra e',
'sra h',
'sra l',
'sra [hl]',
'sra a',
# $30
'swap b',
'swap c',
'swap d',
'swap e',
'swap h',
'swap l',
'swap [hl]',
'swap a',
# $38
'srl b',
'srl c',
'srl d',
'srl e',
'srl h',
'srl l',
'srl [hl]',
'srl a',
# $40
'bit 0, b',
'bit 0, c',
'bit 0, d',
'bit 0, e',
'bit 0, h',
'bit 0, l',
'bit 0, [hl]',
'bit 0, a',
# $48
'bit 1, b',
'bit 1, c',
'bit 1, d',
'bit 1, e',
'bit 1, h',
'bit 1, l',
'bit 1, [hl]',
'bit 1, a',
# $50
'bit 2, b',
'bit 2, c',
'bit 2, d',
'bit 2, e',
'bit 2, h',
'bit 2, l',
'bit 2, [hl]',
'bit 2, a',
# $58
'bit 3, b',
'bit 3, c',
'bit 3, d',
'bit 3, e',
'bit 3, h',
'bit 3, l',
'bit 3, [hl]',
'bit 3, a',
# $60
'bit 4, b',
'bit 4, c',
'bit 4, d',
'bit 4, e',
'bit 4, h',
'bit 4, l',
'bit 4, [hl]',
'bit 4, a',
# $68
'bit 5, b',
'bit 5, c',
'bit 5, d',
'bit 5, e',
'bit 5, h',
'bit 5, l',
'bit 5, [hl]',
'bit 5, a',
# $70
'bit 6, b',
'bit 6, c',
'bit 6, d',
'bit 6, e',
'bit 6, h',
'bit 6, l',
'bit 6, [hl]',
'bit 6, a',
# $78
'bit 7, b',
'bit 7, c',
'bit 7, d',
'bit 7, e',
'bit 7, h',
'bit 7, l',
'bit 7, [hl]',
'bit 7, a',
# $80
'res 0, b',
'res 0, c',
'res 0, d',
'res 0, e',
'res 0, h',
'res 0, l',
'res 0, [hl]',
'res 0, a',
# $88
'res 1, b',
'res 1, c',
'res 1, d',
'res 1, e',
'res 1, h',
'res 1, l',
'res 1, [hl]',
'res 1, a',
# $90
'res 2, b',
'res 2, c',
'res 2, d',
'res 2, e',
'res 2, h',
'res 2, l',
'res 2, [hl]',
'res 2, a',
# $98
'res 3, b',
'res 3, c',
'res 3, d',
'res 3, e',
'res 3, h',
'res 3, l',
'res 3, [hl]',
'res 3, a',
# $a0
'res 4, b',
'res 4, c',
'res 4, d',
'res 4, e',
'res 4, h',
'res 4, l',
'res 4, [hl]',
'res 4, a',
# $a8
'res 5, b',
'res 5, c',
'res 5, d',
'res 5, e',
'res 5, h',
'res 5, l',
'res 5, [hl]',
'res 5, a',
# $b0
'res 6, b',
'res 6, c',
'res 6, d',
'res 6, e',
'res 6, h',
'res 6, l',
'res 6, [hl]',
'res 6, a',
# $b8
'res 7, b',
'res 7, c',
'res 7, d',
'res 7, e',
'res 7, h',
'res 7, l',
'res 7, [hl]',
'res 7, a',
# $c0
'set 0, b',
'set 0, c',
'set 0, d',
'set 0, e',
'set 0, h',
'set 0, l',
'set 0, [hl]',
'set 0, a',
# $c8
'set 1, b',
'set 1, c',
'set 1, d',
'set 1, e',
'set 1, h',
'set 1, l',
'set 1, [hl]',
'set 1, a',
# $d0
'set 2, b',
'set 2, c',
'set 2, d',
'set 2, e',
'set 2, h',
'set 2, l',
'set 2, [hl]',
'set 2, a',
# $d8
'set 3, b',
'set 3, c',
'set 3, d',
'set 3, e',
'set 3, h',
'set 3, l',
'set 3, [hl]',
'set 3, a',
# $e0
'set 4, b',
'set 4, c',
'set 4, d',
'set 4, e',
'set 4, h',
'set 4, l',
'set 4, [hl]',
'set 4, a',
# $e8
'set 5, b',
'set 5, c',
'set 5, d',
'set 5, e',
'set 5, h',
'set 5, l',
'set 5, [hl]',
'set 5, a',
# $f0
'set 6, b',
'set 6, c',
'set 6, d',
'set 6, e',
'set 6, h',
'set 6, l',
'set 6, [hl]',
'set 6, a',
# $f8
'set 7, b',
'set 7, c',
'set 7, d',
'set 7, e',
'set 7, h',
'set 7, l',
'set 7, [hl]',
'set 7, a'
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment