Skip to content

Instantly share code, notes, and snippets.

@PikalaxALT
Created March 9, 2017 03:48
Show Gist options
  • Save PikalaxALT/6d068f6b465dc6010e5778b8221fa3b5 to your computer and use it in GitHub Desktop.
Save PikalaxALT/6d068f6b465dc6010e5778b8221fa3b5 to your computer and use it in GitHub Desktop.
Script for disassembling Gameboy Z80
#!/usr/bin/python3
import re, sys, argparse, random
TileMap = -1000 # 0xc3a0
AttrMap = -1000 # 0xccd9
SCREEN_WIDTH = 20
SCREEN_HEIGHT = 18
z80table = [
# $00
("nop", 0), ("ld bc, ${:x}", 2), ("ld [bc], a", 0), ("inc bc", 0), ("inc b", 0), ("dec b", 0), ("ld b, ${:x}", 1), ("rlca", 0),
# $08
("ld [${:x}], sp", 2), ("add hl, bc", 0), ("ld a, [bc]", 0), ("dec bc", 0), ("inc c", 0), ("dec c", 0), ("ld c, ${:x}", 1), ("rrca", 0),
# $10
("stop", 0), ("ld de, ${:x}", 2), ("ld [de], a", 0), ("inc de", 0), ("inc d", 0), ("dec d", 0), ("ld d, ${:x}", 1), ("rla", 0),
# $18
("jr ${:x}\n", 1), ("add hl, de", 0), ("ld a, [de]", 0), ("dec de", 0), ("inc e", 0), ("dec e", 0), ("ld e, ${:x}", 1), ("rra", 0),
# $20
("jr nz, ${:x}", 1), ("ld hl, ${:x}", 2), ("ld [hli], a", 0), ("inc hl", 0), ("inc h", 0), ("dec h", 0), ("ld h, ${:x}", 1), ("daa", 0),
# $28
("jr z, ${:x}", 1), ("add hl, hl", 0), ("ld a, [hli]", 0), ("dec hl", 0), ("inc l", 0), ("dec l", 0), ("ld l, ${:x}", 1), ("cpl", 0),
# $30
("jr nc, ${:x}", 1), ("ld sp, ${:x}", 2), ("ld [hld], a", 0), ("inc sp", 0), ("inc [hl]", 0), ("dec [hl]", 0), ("ld [hl], ${:x}", 1), ("scf", 0),
# $38
("jr c, ${:x}", 1), ("add hl, sp", 0), ("ld a, [hld]", 0), ("dec sp", 0), ("inc a", 0), ("dec a", 0), ("ld a, ${:x}", 1), ("ccf", 0),
# $40
("ld b, b", 0), ("ld b, c", 0), ("ld b, d", 0), ("ld b, e", 0), ("ld b, h", 0), ("ld b, l", 0), ("ld b, [hl]", 0), ("ld b, a", 0),
# $48
("ld c, b", 0), ("ld c, c", 0), ("ld c, d", 0), ("ld c, e", 0), ("ld c, h", 0), ("ld c, l", 0), ("ld c, [hl]", 0), ("ld c, a", 0),
# $50
("ld d, b", 0), ("ld d, c", 0), ("ld d, d", 0), ("ld d, e", 0), ("ld d, h", 0), ("ld d, l", 0), ("ld d, [hl]", 0), ("ld d, a", 0),
# $58
("ld e, b", 0), ("ld e, c", 0), ("ld e, d", 0), ("ld e, e", 0), ("ld e, h", 0), ("ld e, l", 0), ("ld e, [hl]", 0), ("ld e, a", 0),
# $60
("ld h, b", 0), ("ld h, c", 0), ("ld h, d", 0), ("ld h, e", 0), ("ld h, h", 0), ("ld h, l", 0), ("ld h, [hl]", 0), ("ld h, a", 0),
# $68
("ld l, b", 0), ("ld l, c", 0), ("ld l, d", 0), ("ld l, e", 0), ("ld l, h", 0), ("ld l, l", 0), ("ld l, [hl]", 0), ("ld l, a", 0),
# $70
("ld [hl], b", 0), ("ld [hl], c", 0), ("ld [hl], d", 0), ("ld [hl], e", 0), ("ld [hl], h", 0), ("ld [hl], l", 0), ("halt", 0), ("ld [hl], a", 0),
# $78
("ld a, b", 0), ("ld a, c", 0), ("ld a, d", 0), ("ld a, e", 0), ("ld a, h", 0), ("ld a, l", 0), ("ld a, [hl]", 0), ("ld a, a", 0),
# $80
("add b", 0), ("add c", 0), ("add d", 0), ("add e", 0), ("add h", 0), ("add l", 0), ("add [hl]", 0), ("add a", 0),
# $88
("adc b", 0), ("adc c", 0), ("adc d", 0), ("adc e", 0), ("adc h", 0), ("adc l", 0), ("adc [hl]", 0), ("adc a", 0),
# $90
("sub b", 0), ("sub c", 0), ("sub d", 0), ("sub e", 0), ("sub h", 0), ("sub l", 0), ("sub [hl]", 0), ("sub a", 0),
# $98
("sbc b", 0), ("sbc c", 0), ("sbc d", 0), ("sbc e", 0), ("sbc h", 0), ("sbc l", 0), ("sbc [hl]", 0), ("sbc a", 0),
# $a0
("and b", 0), ("and c", 0), ("and d", 0), ("and e", 0), ("and h", 0), ("and l", 0), ("and [hl]", 0), ("and a", 0),
# $a8
("xor b", 0), ("xor c", 0), ("xor d", 0), ("xor e", 0), ("xor h", 0), ("xor l", 0), ("xor [hl]", 0), ("xor a", 0),
# $b0
("or b", 0), ("or c", 0), ("or d", 0), ("or e", 0), ("or h", 0), ("or l", 0), ("or [hl]", 0), ("or a", 0),
# $b8
("cp b", 0), ("cp c", 0), ("cp d", 0), ("cp e", 0), ("cp h", 0), ("cp l", 0), ("cp [hl]", 0), ("cp a", 0),
# $c0
("ret nz", 0), ("pop bc", 0), ("jp nz, ${:x}", 2), ("jp ${:x}\n", 2), ("call nz, ${:x}", 2), ("push bc", 0), ("add ${:x}", 1), ("rst $0", 0),
# $c8
("ret z", 0), ("ret\n", 0), ("jp z, ${:x}", 2), ("extd", 0), ("call z, ${:x}", 2), ("call ${:x}", 2), ("adc ${:x}", 1), ("rst $08", 0),
# $d0
("ret nc", 0), ("pop de", 0), ("jp nc, ${:x}", 2), ("db $d3", 0), ("call nc, ${:x}", 2), ("push de", 0), ("sub ${:x}", 1), ("rst $10", 0),
# $d8
("ret c", 0), ("reti\n", 0), ("jp c, ${:x}", 2), ("db $db", 0), ("call c, ${:x}", 2), ("db $dd", 0), ("sbc ${:x}", 1), ("rst $18", 0),
# $e0
("ld [$ff{:02x}], a", 1), ("pop hl", 0), ("ld [$ff00+c], a", 0), ("db $e3", 0), ("db $e4", 0), ("push hl", 0), ("and ${:x}", 1), ("rst $20", 0),
# $e8
("add sp, ${:x}", 1), ("jp [hl]\n", 0), ("ld [${:x}], a", 2), ("db $eb", 0), ("db $ec", 0), ("db $ed", 0), ("xor ${:x}", 1), ("rst $28", 0),
# $f0
("ld a, [$ff{:02x}]", 1), ("pop af", 0), ("ld a, [$ff00+c]", 0), ("di", 0), ("db $f4", 0), ("push af", 0), ("or ${:x}", 1), ("rst $30", 0),
# $f8
("ld hl, sp+${:x}", 1), ("ld sp, hl", 0), ("ld a, [${:x}]", 2), ("ei", 0), ("db $fc", 0), ("db $fd", 0), ("cp ${:x}", 1), ("rst $38", 0)
]
extdtable = [
# $00
"rlc b", "rlc c", "rlc d", "rlc e", "rlc h", "rlc l", "rlc [hl]", "rlc a",
# $08
"rrc b", "rrc c", "rrc d", "rrc e", "rrc h", "rrc l", "rrc [hl]", "rrc a",
# $10
"rl b", "rl c", "rl d", "rl e", "rl h", "rl l", "rl [hl]", "rl a",
# $18
"rr b", "rr c", "rr d", "rr e", "rr h", "rr l", "rr [hl]", "rr a",
# $20
"sla b", "sla c", "sla d", "sla e", "sla h", "sla l", "sla [hl]", "sla a",
# $28
"sra b", "sra c", "sra d", "sra e", "sra h", "sra l", "sra [hl]", "sra a",
# $30
"swap b", "swap c", "swap d", "swap e", "swap h", "swap l", "swap [hl]", "swap a",
# $38
"srl b", "srl c", "srl d", "srl e", "srl h", "srl l", "srl [hl]", "srl a",
# $40
"bit 0, b", "bit 0, c", "bit 0, d", "bit 0, e", "bit 0, h", "bit 0, l", "bit 0, [hl]", "bit 0, a",
# $48
"bit 1, b", "bit 1, c", "bit 1, d", "bit 1, e", "bit 1, h", "bit 1, l", "bit 1, [hl]", "bit 1, a",
# $50
"bit 2, b", "bit 2, c", "bit 2, d", "bit 2, e", "bit 2, h", "bit 2, l", "bit 2, [hl]", "bit 2, a",
# $58
"bit 3, b", "bit 3, c", "bit 3, d", "bit 3, e", "bit 3, h", "bit 3, l", "bit 3, [hl]", "bit 3, a",
# $60
"bit 4, b", "bit 4, c", "bit 4, d", "bit 4, e", "bit 4, h", "bit 4, l", "bit 4, [hl]", "bit 4, a",
# $68
"bit 5, b", "bit 5, c", "bit 5, d", "bit 5, e", "bit 5, h", "bit 5, l", "bit 5, [hl]", "bit 5, a",
# $70
"bit 6, b", "bit 6, c", "bit 6, d", "bit 6, e", "bit 6, h", "bit 6, l", "bit 6, [hl]", "bit 6, a",
# $78
"bit 7, b", "bit 7, c", "bit 7, d", "bit 7, e", "bit 7, h", "bit 7, l", "bit 7, [hl]", "bit 7, a",
# $80
"res 0, b", "res 0, c", "res 0, d", "res 0, e", "res 0, h", "res 0, l", "res 0, [hl]", "res 0, a",
# $88
"res 1, b", "res 1, c", "res 1, d", "res 1, e", "res 1, h", "res 1, l", "res 1, [hl]", "res 1, a",
# $90
"res 2, b", "res 2, c", "res 2, d", "res 2, e", "res 2, h", "res 2, l", "res 2, [hl]", "res 2, a",
# $98
"res 3, b", "res 3, c", "res 3, d", "res 3, e", "res 3, h", "res 3, l", "res 3, [hl]", "res 3, a",
# $a0
"res 4, b", "res 4, c", "res 4, d", "res 4, e", "res 4, h", "res 4, l", "res 4, [hl]", "res 4, a",
# $a8
"res 5, b", "res 5, c", "res 5, d", "res 5, e", "res 5, h", "res 5, l", "res 5, [hl]", "res 5, a",
# $b0
"res 6, b", "res 6, c", "res 6, d", "res 6, e", "res 6, h", "res 6, l", "res 6, [hl]", "res 6, a",
# $b8
"res 7, b", "res 7, c", "res 7, d", "res 7, e", "res 7, h", "res 7, l", "res 7, [hl]", "res 7, a",
# $c0
"set 0, b", "set 0, c", "set 0, d", "set 0, e", "set 0, h", "set 0, l", "set 0, [hl]", "set 0, a",
# $c8
"set 1, b", "set 1, c", "set 1, d", "set 1, e", "set 1, h", "set 1, l", "set 1, [hl]", "set 1, a",
# $d0
"set 2, b", "set 2, c", "set 2, d", "set 2, e", "set 2, h", "set 2, l", "set 2, [hl]", "set 2, a",
# $d8
"set 3, b", "set 3, c", "set 3, d", "set 3, e", "set 3, h", "set 3, l", "set 3, [hl]", "set 3, a",
# $e0
"set 4, b", "set 4, c", "set 4, d", "set 4, e", "set 4, h", "set 4, l", "set 4, [hl]", "set 4, a",
# $e8
"set 5, b", "set 5, c", "set 5, d", "set 5, e", "set 5, h", "set 5, l", "set 5, [hl]", "set 5, a",
# $f0
"set 6, b", "set 6, c", "set 6, d", "set 6, e", "set 6, h", "set 6, l", "set 6, [hl]", "set 6, a",
# $f8
"set 7, b", "set 7, c", "set 7, d", "set 7, e", "set 7, h", "set 7, l", "set 7, [hl]", "set 7, a"
]
def try_int(value):
try:
return int(value)
except ValueError:
return int(value, 16)
def parse_sym_fname(symfname, constants):
pat = re.compile("(?P<bank>[0-9A-F]{2}):(?P<addr>[0-9A-F]{4}) (?P<label>\S+)$")
symbols = {None: [None]}
for line in symfname.readlines():
M = pat.match(line)
if M is not None:
bank, addr, label = M.groups()
key = (int(bank,16), int(addr,16))
if "." in label:
continue
if key in symbols:
symbols[key].append(label)
else:
symbols[key] = [label]
if constants is not None:
for fname in constants:
with open(fname, "r") as C:
for line in C.readlines():
if " EQU " in line:
args = line.split()
if not args[2].startswith("$"):
continue
key = (0, int(args[2][1:], 16))
if (key[1] < 0xff00) and (not (args[0].startswith('MBC') or args[0].startswith('HuC'))):
continue
if "." in args[0]:
continue
# print(key)
if key in symbols:
symbols[key].append(args[0])
else:
symbols[key] = [args[0]]
if "HRAM_START" in symbols[(0,0xFF80)]:
symbols[(0,0xFF80)].remove("HRAM_START")
if "HRAM_END" in symbols[(0,0xFFFF)]:
symbols[(0,0xFFFF)].remove("HRAM_END")
return symbols
def get_symbol(bank, addr, symbols):
if addr < 0x4000 and addr not in [0x0000, 0x2000]:
key = (0, addr)
elif addr < 0x8000 and addr not in [0x4000, 0x6000]:
key = (bank, addr)
elif addr >= 0xc000 and addr < 0xd000:
key = (0, addr)
elif addr >= 0xd000 and addr < 0xe000:
key = (1, addr)
elif addr >= 0xff00:
key = (0, addr)
else:
key = None
if key not in symbols:
return None
return random.choice(symbols[key])
def disassemble_chunk(fnamename, start, end, outfname, symbols):
try:
start = int(start)
end = int(end)
except ValueError:
start = int(start, 16)
end = int(end, 16)
S = start & 0x3fff
E = end & 0x3fff
if start >= 0x4000:
S += 0x4000
E += 0x4000
labels = {}
lines = []
with open(fnamename, 'rb') as F:
F.seek(start)
while F.tell() < end:
pos = F.tell()
bank, addr = divmod(pos, 0x4000)
if bank:addr += 0x4000
instr, param_pos = get_next_instruction(F, symbols, bank, addr, S, E)
labels[pos] = "\t" + instr + "\n"
if param_pos:
_bank, _addr, _label = param_pos
if (_bank, _addr) not in symbols:
symbols[(_bank, _addr)] = [_label]
elif 'call StackFarCall' in instr:
faddr = int.from_bytes(F.read(2), 'little')
fbank = ord(F.read(1))
farlab = get_symbol(fbank, faddr, symbols)
if not farlab:
farlab = 'Func_{:x}'.format((fbank << 14) | (faddr & 0x3fff))
symbols[(fbank, faddr)] = [farlab]
labels[pos] = '\tfarcall {}\n'.format(farlab)
elif 'FarCall' in instr:
try:
if pos - 3 in labels:
faddr, = re.match(r'\tld hl, (\S+)', labels[pos - 3]).groups()
fbank, = re.match(r'\tld a, \$(\S+)', labels[pos - 5]).groups()
callba = 'callba'
else:
fbank, = re.match(r'\tld a, \$(\S+)', labels[pos - 2]).groups()
faddr, = re.match(r'\tld hl, (\S+)', labels[pos - 5]).groups()
callba = 'callab'
if 'Bank1' in instr:
callba += '_bank1'
except:
continue
fbank = int(fbank, 16)
if faddr.startswith('$'):
faddr = int(faddr[1:], 16)
farlab = get_symbol(fbank, faddr, symbols)
else:
for key, val in symbols.items():
if (val == faddr) and (key[0] == bank):
_, faddr = key
farlab = get_symbol(fbank, faddr, symbols)
break
if not farlab:
farlab = 'Func_{:x}'.format((fbank << 14) | (faddr & 0x3fff))
symbols[(fbank, faddr)] = [farlab]
labels[pos - 5] = '\t{} {}\n'.format(callba, farlab)
if 'callba' in callba:
labels.pop(pos - 3)
else:
labels.pop(pos - 2)
labels.pop(pos)
elif instr == 'jp [hl]\n':
_pos = F.tell()
old_pos = _pos + 0
bank, addr = divmod(_pos, 0x4000)
if bank:addr += 0x4000
while not get_symbol(bank, addr, symbols):
dest = int.from_bytes(F.read(2), 'little')
dpos = (bank << 14) | (dest & 0x3fff)
_label = get_symbol(bank, dest, symbols)
if not _label:
_label = 'Func_{:x}'.format(dpos)
symbols[(bank, dest)] = [_label]
labels[_pos] = '\tdw {}\n'.format(_label)
_pos = F.tell()
bank, addr = divmod(_pos, 0x4000)
if bank:addr += 0x4000
labels[_pos - 2] += '\n'
bank, addr = divmod(old_pos, 0x4000)
if bank:addr += 0x4000
symbols[(bank, addr)] = ['Pointers_{:x}'.format(old_pos)]
for pos in range(start, end):
bank, addr = divmod(pos, 0x4000)
if bank:addr += 0x4000
cur_sym = get_symbol(bank, addr, symbols)
if cur_sym:
if cur_sym.startswith("."):
lines.append(cur_sym + "\n")
else:
lines.append(cur_sym + ": ; {:x} ({:x}:{:04x})\n".format(pos, bank, addr))
if pos in labels:
lines.append(labels[pos])
if outfname == "-":
sys.stdout.write("".join(lines))
else:
with open(outfname, 'w+') as O:
O.write("".join(lines))
def get_next_instruction(open_fname, symbols, bank, addr, S, E):
instr, size = z80table[int.from_bytes(open_fname.read(1), 'little')]
param_pos = None
if instr == "extd":
instr = extdtable[int.from_bytes(open_fname.read(1), 'little')]
elif size:
param = int.from_bytes(open_fname.read(size), 'little')
if instr.startswith("jr"):
if param & 0x80:
param = -(0x100 - param)
param += addr + 2
param_pos = (bank, param, ".asm_{:04x}".format((bank * 0x4000) + (param & 0x3fff)))
param = get_symbol(bank, param, symbols) or param
if type(param) == int:
param = ".asm_{:04x}".format((bank * 0x4000) + (param & 0x3fff))
elif "[$ff" in instr:
prm = (0, 0xff00 | param)
if prm in symbols:
for sym in symbols[prm]:
if not sym.startswith('H_'):
param = sym
break
elif size == 1 and "sp" in instr:
if param > 0x7f:
instr = instr.replace('+', '')
param = "-${:x}".format(0x100 - param)
else:
param = "${:x}".format(param)
elif size == 2:
# if param in range(0xca70, 0xcac0):
# MM = re.match(r'ld (bc|de|hl),', instr)
# if MM:
# reg = MM.group(1)
# instr = reg + 'coord {1}, {0}'.format(*divmod(param - 0xca70, 0x0a))
# param = None
# elif (param >> 8) in range(0x80, 0x88):
# if (param & 0xf == 0) and (param >> 13) == 4:
# if (param >> 8) in range(0x80, 0x88):
# param = 'VTilesOB tile ${:02x}'.format((param & 0x07f0) >> 4)
# elif (param >> 8) in range(0x88, 0x90):
# param = 'VTilesShared tile ${:02x}'.format((param & 0x07f0) >> 4)
# elif (param >> 8) in range(0x90, 0x98):
# param = 'VTilesBG tile ${:02x}'.format((param & 0x07f0) >> 4)
if (param >> 8) in range(0x98, 0x9c):
# elif (param >> 8) in range(0x98, 0x9c):
MM = re.match(r'ld (bc|de|hl),', instr)
if MM:
reg = MM.group(1)
instr = reg + 'bgcoord {1}, {0}'.format(*divmod(param & 0x3ff, 0x20))
param = None
elif (param >> 8) in range(0x9c, 0xa0):
MM = re.match(r'ld (bc|de|hl),', instr)
if MM:
reg = MM.group(1)
instr = reg + 'bgcoord {1}, {0}, VWindow'.format(*divmod(param & 0x3ff, 0x20))
param = None
elif ("ld" in instr) and ("a" in instr) and ((param & 0xff00) == 0xff00):
prm = (0, param)
if prm in symbols:
for sym in symbols[prm]:
if sym.startswith('H_'):
param = sym
break
if type(param) is int:
param = 'H_{:04X}'.format(param)
symbols[prm] = param
else:
if instr.startswith("ld [") or instr.startswith("ld a,"):
try:
param = symbols[(0, param)][0]
except:
pass
if type(param) == int:
for i in range(10):
param = get_symbol(bank, param - i, symbols) or param
if type(param) == int:
if param < 0x8000:
break
if type(param) == str:
if i:
param += " + " + str(i)
break
if "call" in instr or "jp" in instr:
break
if type(param) == int:
if "call" in instr or "jp" in instr:
if bank or (param < 0x4000):
param_pos = (bank, param, "Func_{:04x}".format((bank * 0x4000) * (param & 0x4000 != 0) + (param & 0x3fff)))
param = param_pos[2]
elif param in range(TileMap, TileMap + SCREEN_WIDTH * SCREEN_HEIGHT):
if instr.startswith("ld hl, ") or instr.startswith("ld bc, ") or instr.startswith("ld de, "):
instr = instr[3:5] + "coord ${:x}"
elif instr.startswith("ld a, "):
instr = "aCoord ${:x}"
elif instr.startswith("ld ["):
instr = "Coorda ${:x}"
y, x = divmod(param - TileMap, SCREEN_WIDTH)
param = "{}, {}".format(x, y)
elif param in range(AttrMap, AttrMap + SCREEN_WIDTH * SCREEN_HEIGHT):
if instr.startswith("ld hl, ") or instr.startswith("ld bc, ") or instr.startswith("ld de, "):
instr = instr[3:5] + "coord ${:x}, wAttrMap"
elif instr.startswith("ld a, "):
instr = "aCoord ${:x}, wAttrMap"
elif instr.startswith("ld ["):
instr = "Coorda ${:x}, wAttrMap"
y, x = divmod(param - AttrMap, SCREEN_WIDTH)
param = "{}, {}".format(x, y)
elif param == "wTileMap":
if instr.startswith("ld hl, ") or instr.startswith("ld bc, ") or instr.startswith("ld de, "):
instr = instr[3:5] + "coord ${:x}"
elif instr.startswith("ld a, "):
instr = "aCoord ${:x}"
elif instr.startswith("ld ["):
instr = "Coorda ${:x}"
param = "0, 0"
elif param == "wAttrMap":
if instr.startswith("ld hl, ") or instr.startswith("ld bc, ") or instr.startswith("ld de, "):
instr = instr[3:5] + "coord ${:x}, wAttrMap"
elif instr.startswith("ld a, "):
instr = "aCoord ${:x}, wAttrMap"
elif instr.startswith("ld ["):
instr = "Coorda ${:x}, wAttrMap"
param = "0, 0"
if param is None:
pass
elif type(param) == str:
instr = re.sub("\$(ff)?{.*?}", param, instr)
else:
instr = instr.format(param)
return instr, param_pos
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("start")
parser.add_argument("end")
parser.add_argument("-r", dest="romfname")
parser.add_argument("-o", dest="outfname")
parser.add_argument("-s", dest="symfname", default=None)
parser.add_argument("-c", dest="constants", nargs="+")
args = parser.parse_args()
start = try_int(args.start)
end = try_int(args.end)
if args.symfname:
with open(args.symfname, 'r') as SYM:
symbols = parse_sym_fname(SYM, args.constants)
else:
symbols = {}
disassemble_chunk(args.romfname, start, end, args.outfname, symbols)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment