Created
May 10, 2018 15:49
-
-
Save i30817/96674a3de8d9e4cb890e92cec3f36990 to your computer and use it in GitHub Desktop.
dump the serial/id of a rom.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import os.path | |
import hashlib | |
import signal | |
import sys | |
import re | |
import io | |
import traceback | |
import argparse | |
import json | |
try: | |
import gi | |
gi.require_version('Mirage', '3.1') | |
except: | |
print("Please install PyGObject (often called python-gobject) and libmirage from cdemu http://cdemu.sourceforge.net/ to use this program", file=sys.stderr) | |
sys.exit(1) | |
try: | |
import pycdlib | |
except Exception as e: | |
print("Please install pycdlib at least 1.4.0. You can install the most recent version with 'pip install --user https://github.com/clalancette/pycdlib/archive/master.zip', if 'pip install --user pycdlib' doesn't install 1.4.0", file=sys.stderr) | |
sys.exit(1) | |
from gi.repository import Mirage | |
#for the playstation, unfortunately, both ps1 and | |
#ps2 often have the same system description in isos | |
#the option requires further parsing to figure out | |
#lower case ids are the 'force' ids | |
PS_SYSTEM_IDS = ["PLAYSTATION", "PSX", "psx", "ps2"] | |
SAT_SYSTEM_IDS = ["SEGA SEGASATURN", "ss"] | |
SCD_SYSTEM_IDS = ["SEGADISCSYSTEM", "scd"] | |
DC_SYSTEM_IDS = ["SEGA SEGAKATANA", "dc"] | |
FORCE_SYSTEM_IDS = ["psx", "ps2", "ss", "scd", "dc"] | |
#this class pretends that MODE2/FORM2 tracks are MODE2/FORM1. | |
#while it is possible to make it read the right bytes with memoization of offsets and maths | |
#pycdlib reacts very badly to mixed mode tracks. It's better to truncate the sectors | |
#and hope that the files needed are never on form2 sectors (not so unlikely) | |
#just because a cd image is Mixed mode (MODE2/2352) doesn't mean it has form2 sectors | |
#(they're basically only for music and video files on the ps1) | |
class MirageFileWrapper (object): | |
"""Emulates READ file accesspycdlib to Mode1 track on disc image""" | |
def __init__ (self, image_files): | |
self.pos = 0 | |
self.length = 0 | |
# Open the image file(s) using libMirage | |
self.mirage_context = Mirage.Context() | |
self.disc = self.mirage_context.load_image(image_files) | |
# Find the first Mode 1, Mode 2 Form 1, or Mode 2 Mixed track | |
self.track = None | |
for t in range(0, self.disc.get_number_of_tracks()): | |
track = self.disc.get_track_by_index(t) | |
if track.get_sector_type() in [ Mirage.SectorType.MODE1, Mirage.SectorType.MODE2_FORM1, Mirage.SectorType.MODE2_MIXED ]: | |
self.track = track | |
# Validate | |
if self.track is None: | |
raise("Failed to find Mode1/Mode 2 Form 1/Mode 2 Form 2 track!") | |
self.length = (self.track.layout_get_start_sector() + self.track.layout_get_length())*2048 | |
def seek (self, pos, whence=os.SEEK_SET): | |
if whence == os.SEEK_SET: | |
self.pos = pos | |
elif whence == os.SEEK_CUR: | |
self.pos += pos | |
elif whence == os.SEEK_END: | |
self.pos += self.length + pos | |
def tell (self): | |
return self.pos | |
def read (self, length): | |
ret_buffer = [] | |
# Attempt to handle cases when read request spans multiple | |
# sectors... | |
while length: | |
pos_sector = self.pos / 2048 | |
pos_offset = self.pos % 2048 | |
# Get sector - for simplicity, we use absolute addressing, | |
# with implicit assumption that we are using first track... | |
# (for relative addressing, we might have to add the pregap) | |
sector = self.track.get_sector(pos_sector, True) | |
sector_data = sector.get_data()[1] | |
# Select the part of data we're interested in, | |
# truncate the rest if any (MODE2/FORM2 sectors) | |
tmp_len = min(length, 2048 - pos_offset) | |
tmp_data = sector_data[pos_offset:pos_offset+tmp_len] | |
ret_buffer.append(tmp_data) | |
# Update position and remaining length | |
self.pos = self.pos + tmp_len | |
length = length - tmp_len | |
return "".join(ret_buffer) | |
def standardize_serial(serial): | |
serial = serial.strip().replace(".", "").replace("_", "-").upper() | |
#Mizzurna Falls (Japan) is the only i found that needs this | |
#we could just eliminate '-' like '.', but redump uses them | |
if "-" not in serial: | |
for i, c in enumerate(serial): | |
if c.isdigit(): | |
serial = serial[:i] + "-" + serial[i:] | |
break | |
return serial | |
def parse_boot_type_and_serial(system_cnf): | |
pattern = re.compile("([^:\\\\;]+)(:?;1)?$") | |
for line in system_cnf.splitlines(): | |
for match in re.finditer(pattern, line): | |
if line.rstrip().startswith("BOOT2"): | |
return ("ps2", match.group(1)) | |
elif line.rstrip().startswith("BOOT"): | |
return ("psx", match.group(1)) | |
else: | |
raise Exception("unknown playstation boot type '%s'" % line) | |
raise Exception("failed parsing playstation boot file") | |
def sanity_check(serial): | |
if (serial is None or not ( | |
serial.startswith("SCES") or serial.startswith("SCED") or | |
serial.startswith("SLES") or serial.startswith("SLED") or | |
serial.startswith("SCPS") or serial.startswith("SLPS") or | |
serial.startswith("SLPM") or serial.startswith("SCUS") or | |
serial.startswith("SLUS") or serial.startswith("PAPX") or | |
serial.startswith("LSP"))): | |
raise Exception("not a sony serial "+serial) | |
def parse_data(mirage_wrapper, system): | |
# SEGA header appears at the first track, position 0 | |
# Even if the first track is not a data track (dreamcast for ex) | |
# Number of bytes on sectors (2048 or 2352) doesn't matter since the header is only 256 bytes | |
#apparently libmirage does add some sectors before 0 (-150 is usually the 'real start' and influences sector count) | |
#but those haven't got 'real' file data and 0 is always (???) the real start if you want the raw 0 sector. | |
sector = mirage_wrapper.disc.get_sector(0) | |
(status, data) = sector.get_data() | |
#try to extract from sega header (works for dreamcast, saturn and megacd) | |
if not system: | |
sega_system = data[0 : 16].encode("ascii").replace("\00","").strip().upper() | |
else: | |
sega_system = system | |
if sega_system in SAT_SYSTEM_IDS: | |
m = hashlib.md5() | |
m.update( data[:256] ) | |
serial = "unknown" | |
return { "serial":serial, "system":"ss", "id_md5":m.hexdigest() } | |
# There are actually 2 headers in segacd (segacd and genesis) 256 bytes each, | |
# but since redump only shows the genesis, that's what is hashed. | |
elif sega_system in SCD_SYSTEM_IDS: | |
m = hashlib.md5() | |
m.update( data[256:512] ) | |
serial = "unknown" | |
return { "serial":serial, "system":"scd", "id_md5":m.hexdigest() } | |
elif sega_system in DC_SYSTEM_IDS: | |
m = hashlib.md5() | |
m.update( data[:256] ) | |
serial = data[64:73].decode("utf-8").strip() | |
return { "serial":serial, "system":"dc", "id_md5":m.hexdigest() } | |
iso = pycdlib.PyCdlib() | |
iso.open_fp(wrapper) | |
if not system: | |
system = iso.pvd.system_identifier.strip().upper() | |
#this translation nukes the system identifier... https://www.romhacking.net/translations/265/ | |
if not system and iso.pvd.volume_identifier.strip() == "Cotton RIP Trans": | |
system = "PLAYSTATION" | |
if system in PS_SYSTEM_IDS: | |
try: | |
# Read SYSTEM.CNF to in-memory data stream | |
system_cnf = io.StringIO() | |
iso.get_and_write_fp("/SYSTEM.CNF;1", system_cnf) | |
byte_str = system_cnf.getvalue() | |
text_obj = byte_str.decode('UTF-8') | |
(ps_type, serial) = parse_boot_type_and_serial(text_obj) | |
serial = standardize_serial(serial) | |
sanity_check(serial) | |
#Urban Chaos has the wrong serial name in the executable (the one from threads of fate) | |
#their label is different though | |
if serial == 'SLUS-01019' and serial == iso.pvd.volume_identifier.strip(): | |
serial = 'SLUS-01091' #replace by serial in redump | |
return { "serial":serial, "system":ps_type, "id_md5":"unknown" } | |
#can happen if there isn't a system.cnf file | |
#(which happens in some early ps1 (only) games King's Field (Japan) for instance) | |
#in that case, apparently the serial is in the cd volume identifier. But sanity check first. | |
except pycdlib.pycdlibexception.PyCdlibInvalidInput: | |
try: | |
serial = standardize_serial(iso.pvd.volume_identifier) | |
except AttributeError: | |
serial = None | |
sanity_check(serial) | |
return { "serial":serial, "system":"psx", "id_md5":"unknown" } | |
raise Exception("unknown system identifier \'%s\', consider using the force option" % system) | |
def maybe_replace(cd_dump): | |
def readable(path): | |
return os.path.isfile(cd_dump) and os.access(cd_dump, os.R_OK) | |
if not readable(cd_dump): | |
raise Exception("file doesn't exist or not readable") | |
if cd_dump.lower().endswith(".m3u"): | |
newpath = "" | |
for line in open(cd_dump): | |
line = line.strip() | |
if os.path.isabs(line): | |
newpath = line | |
elif line != "": | |
#turns path of the dump absolute, relative to the dir of the m3u file | |
newpath = os.path.abspath(os.path.normpath(os.path.join(os.path.dirname(cd_dump), line))) | |
break | |
if readable(newpath): | |
return newpath | |
else: | |
raise Exception("m3u parse error") | |
else: | |
return cd_dump | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='Parse game serial (psx, ps2, dc) or md5 hash of some invariant region (ss, scd, dc) to identify games regardless of cd-image format.', epilog='M3u parsing returns just the id of the first item on the m3u. For each valid file processed, this command returns a json list of maps to sys.out of the format: [ { file: parsed-file, system: psx|ps2|ss|scd|dc|unknown (redump system names), serial: string|unknown, id_md5: string|unknown } ]') | |
parser.add_argument('paths', metavar='CD-IMAGE|M3U-FILE', type=str, nargs='+', | |
help='cd-image to scan in all formats cdemu supports, or m3u containing at least one cd-image.') | |
parser.add_argument('--force', type=str, nargs=1, choices=FORCE_SYSTEM_IDS, help='force a type of console while parsing the file(s). Useful for consoles or games that can\'t recognized correctly because of system identifier errors.') | |
args = parser.parse_args(sys.argv[1:]) | |
signal.signal(signal.SIGINT, signal.SIG_DFL) # Make Ctrl+C work | |
try: | |
Mirage.initialize() | |
output = [] | |
for cd_dump in args.paths: | |
try: | |
replacement_dump = maybe_replace(cd_dump) | |
wrapper = MirageFileWrapper([ replacement_dump ]) | |
parse_dict = parse_data(wrapper, None if args.force is None else args.force[0]) | |
parse_dict.update({"file":cd_dump}) | |
output += [parse_dict] | |
except Exception as e: | |
print("WARN: error '{}' from cd-image '{}'".format(str(e), cd_dump), file=sys.stderr) | |
output += [{ 'file': cd_dump, 'system':'unknown', 'serial':'unknown', 'id_md5':'unknown' }] | |
#traceback.print_exc(e) | |
print(json.dumps(output, indent=4, sort_keys=True)) | |
finally: | |
Mirage.shutdown() | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment