Skip to content

Instantly share code, notes, and snippets.

@i30817
Created May 10, 2018 15:49
Show Gist options
  • Save i30817/96674a3de8d9e4cb890e92cec3f36990 to your computer and use it in GitHub Desktop.
Save i30817/96674a3de8d9e4cb890e92cec3f36990 to your computer and use it in GitHub Desktop.
dump the serial/id of a rom.
#!/usr/bin/python3
import os.path
import hashlib
import signal
import sys
import re
import io
import traceback
import argparse
import json
try:
import gi
gi.require_version('Mirage', '3.1')
except:
print("Please install PyGObject (often called python-gobject) and libmirage from cdemu http://cdemu.sourceforge.net/ to use this program", file=sys.stderr)
sys.exit(1)
try:
import pycdlib
except Exception as e:
print("Please install pycdlib at least 1.4.0. You can install the most recent version with 'pip install --user https://github.com/clalancette/pycdlib/archive/master.zip', if 'pip install --user pycdlib' doesn't install 1.4.0", file=sys.stderr)
sys.exit(1)
from gi.repository import Mirage
#for the playstation, unfortunately, both ps1 and
#ps2 often have the same system description in isos
#the option requires further parsing to figure out
#lower case ids are the 'force' ids
PS_SYSTEM_IDS = ["PLAYSTATION", "PSX", "psx", "ps2"]
SAT_SYSTEM_IDS = ["SEGA SEGASATURN", "ss"]
SCD_SYSTEM_IDS = ["SEGADISCSYSTEM", "scd"]
DC_SYSTEM_IDS = ["SEGA SEGAKATANA", "dc"]
FORCE_SYSTEM_IDS = ["psx", "ps2", "ss", "scd", "dc"]
#this class pretends that MODE2/FORM2 tracks are MODE2/FORM1.
#while it is possible to make it read the right bytes with memoization of offsets and maths
#pycdlib reacts very badly to mixed mode tracks. It's better to truncate the sectors
#and hope that the files needed are never on form2 sectors (not so unlikely)
#just because a cd image is Mixed mode (MODE2/2352) doesn't mean it has form2 sectors
#(they're basically only for music and video files on the ps1)
class MirageFileWrapper (object):
"""Emulates READ file accesspycdlib to Mode1 track on disc image"""
def __init__ (self, image_files):
self.pos = 0
self.length = 0
# Open the image file(s) using libMirage
self.mirage_context = Mirage.Context()
self.disc = self.mirage_context.load_image(image_files)
# Find the first Mode 1, Mode 2 Form 1, or Mode 2 Mixed track
self.track = None
for t in range(0, self.disc.get_number_of_tracks()):
track = self.disc.get_track_by_index(t)
if track.get_sector_type() in [ Mirage.SectorType.MODE1, Mirage.SectorType.MODE2_FORM1, Mirage.SectorType.MODE2_MIXED ]:
self.track = track
# Validate
if self.track is None:
raise("Failed to find Mode1/Mode 2 Form 1/Mode 2 Form 2 track!")
self.length = (self.track.layout_get_start_sector() + self.track.layout_get_length())*2048
def seek (self, pos, whence=os.SEEK_SET):
if whence == os.SEEK_SET:
self.pos = pos
elif whence == os.SEEK_CUR:
self.pos += pos
elif whence == os.SEEK_END:
self.pos += self.length + pos
def tell (self):
return self.pos
def read (self, length):
ret_buffer = []
# Attempt to handle cases when read request spans multiple
# sectors...
while length:
pos_sector = self.pos / 2048
pos_offset = self.pos % 2048
# Get sector - for simplicity, we use absolute addressing,
# with implicit assumption that we are using first track...
# (for relative addressing, we might have to add the pregap)
sector = self.track.get_sector(pos_sector, True)
sector_data = sector.get_data()[1]
# Select the part of data we're interested in,
# truncate the rest if any (MODE2/FORM2 sectors)
tmp_len = min(length, 2048 - pos_offset)
tmp_data = sector_data[pos_offset:pos_offset+tmp_len]
ret_buffer.append(tmp_data)
# Update position and remaining length
self.pos = self.pos + tmp_len
length = length - tmp_len
return "".join(ret_buffer)
def standardize_serial(serial):
serial = serial.strip().replace(".", "").replace("_", "-").upper()
#Mizzurna Falls (Japan) is the only i found that needs this
#we could just eliminate '-' like '.', but redump uses them
if "-" not in serial:
for i, c in enumerate(serial):
if c.isdigit():
serial = serial[:i] + "-" + serial[i:]
break
return serial
def parse_boot_type_and_serial(system_cnf):
pattern = re.compile("([^:\\\\;]+)(:?;1)?$")
for line in system_cnf.splitlines():
for match in re.finditer(pattern, line):
if line.rstrip().startswith("BOOT2"):
return ("ps2", match.group(1))
elif line.rstrip().startswith("BOOT"):
return ("psx", match.group(1))
else:
raise Exception("unknown playstation boot type '%s'" % line)
raise Exception("failed parsing playstation boot file")
def sanity_check(serial):
if (serial is None or not (
serial.startswith("SCES") or serial.startswith("SCED") or
serial.startswith("SLES") or serial.startswith("SLED") or
serial.startswith("SCPS") or serial.startswith("SLPS") or
serial.startswith("SLPM") or serial.startswith("SCUS") or
serial.startswith("SLUS") or serial.startswith("PAPX") or
serial.startswith("LSP"))):
raise Exception("not a sony serial "+serial)
def parse_data(mirage_wrapper, system):
# SEGA header appears at the first track, position 0
# Even if the first track is not a data track (dreamcast for ex)
# Number of bytes on sectors (2048 or 2352) doesn't matter since the header is only 256 bytes
#apparently libmirage does add some sectors before 0 (-150 is usually the 'real start' and influences sector count)
#but those haven't got 'real' file data and 0 is always (???) the real start if you want the raw 0 sector.
sector = mirage_wrapper.disc.get_sector(0)
(status, data) = sector.get_data()
#try to extract from sega header (works for dreamcast, saturn and megacd)
if not system:
sega_system = data[0 : 16].encode("ascii").replace("\00","").strip().upper()
else:
sega_system = system
if sega_system in SAT_SYSTEM_IDS:
m = hashlib.md5()
m.update( data[:256] )
serial = "unknown"
return { "serial":serial, "system":"ss", "id_md5":m.hexdigest() }
# There are actually 2 headers in segacd (segacd and genesis) 256 bytes each,
# but since redump only shows the genesis, that's what is hashed.
elif sega_system in SCD_SYSTEM_IDS:
m = hashlib.md5()
m.update( data[256:512] )
serial = "unknown"
return { "serial":serial, "system":"scd", "id_md5":m.hexdigest() }
elif sega_system in DC_SYSTEM_IDS:
m = hashlib.md5()
m.update( data[:256] )
serial = data[64:73].decode("utf-8").strip()
return { "serial":serial, "system":"dc", "id_md5":m.hexdigest() }
iso = pycdlib.PyCdlib()
iso.open_fp(wrapper)
if not system:
system = iso.pvd.system_identifier.strip().upper()
#this translation nukes the system identifier... https://www.romhacking.net/translations/265/
if not system and iso.pvd.volume_identifier.strip() == "Cotton RIP Trans":
system = "PLAYSTATION"
if system in PS_SYSTEM_IDS:
try:
# Read SYSTEM.CNF to in-memory data stream
system_cnf = io.StringIO()
iso.get_and_write_fp("/SYSTEM.CNF;1", system_cnf)
byte_str = system_cnf.getvalue()
text_obj = byte_str.decode('UTF-8')
(ps_type, serial) = parse_boot_type_and_serial(text_obj)
serial = standardize_serial(serial)
sanity_check(serial)
#Urban Chaos has the wrong serial name in the executable (the one from threads of fate)
#their label is different though
if serial == 'SLUS-01019' and serial == iso.pvd.volume_identifier.strip():
serial = 'SLUS-01091' #replace by serial in redump
return { "serial":serial, "system":ps_type, "id_md5":"unknown" }
#can happen if there isn't a system.cnf file
#(which happens in some early ps1 (only) games King's Field (Japan) for instance)
#in that case, apparently the serial is in the cd volume identifier. But sanity check first.
except pycdlib.pycdlibexception.PyCdlibInvalidInput:
try:
serial = standardize_serial(iso.pvd.volume_identifier)
except AttributeError:
serial = None
sanity_check(serial)
return { "serial":serial, "system":"psx", "id_md5":"unknown" }
raise Exception("unknown system identifier \'%s\', consider using the force option" % system)
def maybe_replace(cd_dump):
def readable(path):
return os.path.isfile(cd_dump) and os.access(cd_dump, os.R_OK)
if not readable(cd_dump):
raise Exception("file doesn't exist or not readable")
if cd_dump.lower().endswith(".m3u"):
newpath = ""
for line in open(cd_dump):
line = line.strip()
if os.path.isabs(line):
newpath = line
elif line != "":
#turns path of the dump absolute, relative to the dir of the m3u file
newpath = os.path.abspath(os.path.normpath(os.path.join(os.path.dirname(cd_dump), line)))
break
if readable(newpath):
return newpath
else:
raise Exception("m3u parse error")
else:
return cd_dump
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Parse game serial (psx, ps2, dc) or md5 hash of some invariant region (ss, scd, dc) to identify games regardless of cd-image format.', epilog='M3u parsing returns just the id of the first item on the m3u. For each valid file processed, this command returns a json list of maps to sys.out of the format: [ { file: parsed-file, system: psx|ps2|ss|scd|dc|unknown (redump system names), serial: string|unknown, id_md5: string|unknown } ]')
parser.add_argument('paths', metavar='CD-IMAGE|M3U-FILE', type=str, nargs='+',
help='cd-image to scan in all formats cdemu supports, or m3u containing at least one cd-image.')
parser.add_argument('--force', type=str, nargs=1, choices=FORCE_SYSTEM_IDS, help='force a type of console while parsing the file(s). Useful for consoles or games that can\'t recognized correctly because of system identifier errors.')
args = parser.parse_args(sys.argv[1:])
signal.signal(signal.SIGINT, signal.SIG_DFL) # Make Ctrl+C work
try:
Mirage.initialize()
output = []
for cd_dump in args.paths:
try:
replacement_dump = maybe_replace(cd_dump)
wrapper = MirageFileWrapper([ replacement_dump ])
parse_dict = parse_data(wrapper, None if args.force is None else args.force[0])
parse_dict.update({"file":cd_dump})
output += [parse_dict]
except Exception as e:
print("WARN: error '{}' from cd-image '{}'".format(str(e), cd_dump), file=sys.stderr)
output += [{ 'file': cd_dump, 'system':'unknown', 'serial':'unknown', 'id_md5':'unknown' }]
#traceback.print_exc(e)
print(json.dumps(output, indent=4, sort_keys=True))
finally:
Mirage.shutdown()
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment