Skip to content

Instantly share code, notes, and snippets.

@matthewearl
Last active May 19, 2024 22:12
Show Gist options
  • Save matthewearl/733bba717780604813ed588d8ea7875f to your computer and use it in GitHub Desktop.
Save matthewearl/733bba717780604813ed588d8ea7875f to your computer and use it in GitHub Desktop.
Super Mario Bros Level Extractor
#!/usr/bin/env python3
#
# Copyright (c) 2018 Matthew Earl
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
# USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
Super Mario Bros level extractor
This script requires py65emu, numpy, and PIL to run. Run with no arguments to see usage.
See http://matthewearl.github.io/2018/06/28/smb-level-extractor/ for a description of how this was written.
To run you'll need to compile https://gist.github.com/1wErt3r/4048722 with x816 to obtain the PRG-ROM and symbol files.
The CHR-ROM should be extracted from a Super Mario Bros ROM, or can be read from an INES ROM file. See
https://wiki.nesdev.com/w/index.php/INES for information on the INES format. In addition you'll need a NES palette
saved in "data/ntscpalette.pal", generated using the tool here: https://bisqwit.iki.fi/utils/nespalette.php
"""
import collections
import pathlib
import re
import numpy as np
from py65emu.cpu import CPU
from py65emu.mmu import MMU
_WORKING_RAM_SIZE = 0x800
Symbol = collections.namedtuple('Symbol', ('name', 'address', 'line_num'))
class SymbolFile:
_LINE_RE = r"(?P<name>[A-Z0-9_]+) *= \$(?P<address>[A-F0-9]*) *; <> \d+, statement #(?P<line_num>\d+)"
def __init__(self, fname):
with open(fname) as f:
self._symbols = [self._parse_symbol(line) for line in f.readlines()]
self._symbols = list(sorted(self._symbols, key=lambda s: s.address))
self._name_to_addr = {s.name: s.address for s in self._symbols}
self._addr_to_name = {s.address: s.name for s in self._symbols}
def _parse_symbol(self, line):
m = re.match(self._LINE_RE, line)
return Symbol(m.group('name'), int(m.group('address'), 16), int(m.group('line_num')))
def __getitem__(self, name):
return self._name_to_addr[name]
def _read_ppu_data(mmu, addr):
while True:
ppu_high_addr = mmu.read(addr)
if ppu_high_addr == 0x0:
break
ppu_low_addr = mmu.read(addr + 1)
assert ppu_high_addr == 0x3f and ppu_low_addr == 0x00
flags_and_length = mmu.read(addr + 2)
assert (flags_and_length & (1<<7)) == 0, "32-byte increment flag set"
assert (flags_and_length & (1<<6)) == 0, "Repeating flag set"
length = flags_and_length & 0b111111
addr += 3
for i in range(length):
yield mmu.read(addr)
addr += 1
def _load_palette(mmu, sym_file, nes_palette):
area_type = mmu.read(sym_file['AREATYPE'])
idx = mmu.read(sym_file['AREAPALETTE'] + area_type)
high_addr = mmu.read(sym_file['VRAM_ADDRTABLE_HIGH'] + idx)
low_addr = mmu.read(sym_file['VRAM_ADDRTABLE_LOW'] + idx)
palette_data = list(_read_ppu_data(mmu, high_addr << 8 | low_addr))
assert len(palette_data) == 32
a = np.array(palette_data[:16]).reshape(4, 4)
a[:, 0] = mmu.read(sym_file['BACKGROUNDCOLORS'] + area_type)
return nes_palette[a]
def _execute_subroutine(cpu, addr):
s_before = cpu.r.s
cpu.JSR(addr)
while cpu.r.s != s_before:
cpu.step()
def _get_metatile_buffer(mmu, sym_file):
return [mmu.read(sym_file['METATILEBUFFER'] + i) for i in range(13)]
def load_tile(chr_rom, idx):
chr_rom_addr = 0x1000 + 16 * idx
d = chr_rom[chr_rom_addr:chr_rom_addr + 16]
a = np.array([[b & (128 >> i) != 0 for i in range(8)] for b in d]).reshape(2, 8, 8)
return a[0] + 2 * a[1]
def _render_metatile(mmu, chr_rom, mtile, palette):
palette_num = mtile >> 6
palette_idx = mtile & 0b111111
high_addr = mmu.read(sym_file['METATILEGRAPHICS_HIGH'] + palette_num)
low_addr = mmu.read(sym_file['METATILEGRAPHICS_LOW'] + palette_num)
addr = (high_addr << 8 | low_addr) + palette_idx * 4
t = np.vstack([np.hstack([load_tile(chr_rom, mmu.read(addr + c * 2 + r)) for c in range(2)])
for r in range(2)])
return palette[palette_num][t]
def load_level(stage, prg_rom, chr_rom, sym_file, nes_palette):
# Initialize the MMU / CPU
mmu = MMU([
(0x0, _WORKING_RAM_SIZE, False, []),
(0x8000, 0x10000, True, list(prg_rom))
])
cpu = CPU(mmu, 0x0)
# Execute some preamble subroutines which set up variables used by the main subroutines.
if isinstance(stage, tuple):
world_num, area_num = stage
mmu.write(sym_file['WORLDNUMBER'], world_num - 1)
mmu.write(sym_file['AREANUMBER'], area_num - 1)
_execute_subroutine(cpu, sym_file['LOADAREAPOINTER'])
else:
area_pointer = stage
mmu.write(sym_file['AREAPOINTER'], area_pointer)
mmu.write(sym_file['HALFWAYPAGE'], 0)
mmu.write(sym_file['ALTENTRANCECONTROL'], 0)
mmu.write(sym_file['PRIMARYHARDMODE'], 0)
mmu.write(sym_file['OPERMODE_TASK'], 0)
_execute_subroutine(cpu, sym_file['INITIALIZEAREA'])
# Extract the palette.
palette = _load_palette(mmu, sym_file, nes_palette)
# Repeatedly extract meta-tile columns, until the level starts repeating.
cols = []
for column_pos in range(1000):
_execute_subroutine(cpu, sym_file['AREAPARSERCORE'])
cols.append(_get_metatile_buffer(mmu, sym_file))
_execute_subroutine(cpu, sym_file['INCREMENTCOLUMNPOS'])
if len(cols) >= 96 and cols[-48:] == cols[-96:-48]:
cols = cols[:-80]
break
level = np.array(cols).T
# Render a dict of metatiles.
mtiles = {mtile: _render_metatile(mmu, chr_rom, mtile, palette)
for mtile in set(level.flatten())}
return level, mtiles
def render_level(level, mtiles):
return np.vstack([np.hstack([mtiles[mtile] for mtile in row]) for row in level])
if __name__ == "__main__":
import sys
import PIL.Image
world_map = {
'{}-{}'.format(world_num, area_num): (world_num, area_num)
for world_num in range(1, 9)
for area_num in range(1, 5)
}
world_map.update({
'bonus': 0xc2,
'cloud1': 0x2b,
'cloud2': 0x34,
'water1': 0x00,
'water2': 0x02,
'warp': 0x2f,
})
if len(sys.argv) < 6:
print("Usage: {} <world> <prg-rom> <sym-file> <chr-rom> <out-file>".format(sys.argv[0]), file=sys.stderr)
print(" <world> is one of {}".format(', '.join(sorted(world_map.keys()))), file=sys.stderr)
print(" <prg-rom> is the binary output from x816")
print(" <sym-file> is the sym file output from x816")
print(" <chr-rom> is a CHR-ROM dump")
print(" <out-file> is the output image name")
sys.exit(-1)
stage = world_map[sys.argv[1]]
with open(sys.argv[2], 'rb') as f:
prg_rom = f.read()
sym_file = SymbolFile(sys.argv[3])
with open(sys.argv[4], 'rb') as f:
chr_rom = f.read()
out_fname = sys.argv[5]
with (pathlib.Path(sys.argv[0]).parent / "data" / "ntscpalette.pal").open("rb") as f:
nes_palette = np.array(list(f.read())).reshape(64, 3)
level, mtiles = load_level(stage, prg_rom, chr_rom, sym_file, nes_palette)
a = render_level(level, mtiles).astype(np.uint8)
im = PIL.Image.fromarray(a)
im.save(out_fname)
@lumpman2
Copy link

Since I ran the program correctly this time, I will elaborate on the problem with outputting the levels 2-3, 7-3, and 3-1 (of which the former two are identical except for enemy differences). First of all - and I am honestly unsure if the problem is with the program or the emulated CPU/MMU - this program outputs, for instance, the aboveground 1-2 instead of the main level and inputting "1-3" into the program gives the actual 1-2. "1-4" then gives 1-3 and there is no way to get 1-4 without modifying the program slightly ("1-5" is not allowed).

However, even after shifting the numbers a bit in this program, it still fails to run with the 3 aforementioned levels with this error:

Traceback (most recent call last):
File "C:\Users\lumpm\Desktop\smbnn\smblevextract.py", line 231, in
level, mtiles = load_level(stage, prg_rom, chr_rom, sym_file, nes_palette)
File "C:\Users\lumpm\Desktop\smbnn\smblevextract.py", line 172, in load_level
_execute_subroutine(cpu, sym_file['AREAPARSERCORE'])
File "C:\Users\lumpm\Desktop\smbnn\smblevextract.py", line 111, in _execute_subroutine
cpu.step()
File "C:\Users\lumpm\AppData\Local\Programs\Python\Python310\lib\site-packages\py65emu\cpu.py", line 100, in step
self.ops[opcode]()
File "C:\Users\lumpm\AppData\Local\Programs\Python\Python310\lib\site-packages\py65emu\cpu.py", line 590, in f
op_f(a_f())
File "C:\Users\lumpm\AppData\Local\Programs\Python\Python310\lib\site-packages\py65emu\cpu.py", line 225, in a
return self.mmu.read(self.a_a())
File "C:\Users\lumpm\AppData\Local\Programs\Python\Python310\lib\site-packages\py65emu\mmu.py", line 118, in read
b = self.getBlock(addr)
File "C:\Users\lumpm\AppData\Local\Programs\Python\Python310\lib\site-packages\py65emu\mmu.py", line 94, in getBlock
raise IndexError
IndexError

I attempted to change area_num - 1 and world_num - 1 to avoid the off-by-one error, and oddly enough it worked for world 1 (which has an underground section with the pipe intro) but not for worlds 2 and 7 which also have pipe intro sections. I believe it has something to do with the fact that in the disassembly for SMB1 (at the given link), the worlds with the extra pipe intro have their own address under AreaAddrOffsets which brings the number of "areas" per world to 5 instead of 4, but I'm not aware of any way to monitor or debug that with most of the data work being done by the emulated CPU and not by this program. As it stands the program has been very useful and has worked for every other level (I just needed the metatile data) so I think missing effectively 2 levels is not significant, but I still wonder if there is any insight as to why it does not work for those.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment