Skip to content

Instantly share code, notes, and snippets.

@ssokolow
Created October 4, 2019 06:44
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ssokolow/4d7edbb15f8304b0ce3f830808de73e4 to your computer and use it in GitHub Desktop.
Save ssokolow/4d7edbb15f8304b0ce3f830808de73e4 to your computer and use it in GitHub Desktop.
Quick helper script to cross-compare large numbers of dumps of a ROM or disk to identify bits that vary between dumps
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Simple tool to identify bad bits in a cartridge by comparing many dumps
--snip--
Requires numpy.
"""
# Silence Pylint/Flake8 complaints if run under Python 2
from __future__ import (absolute_import, division, print_function,
with_statement, unicode_literals)
__author__ = "Stephan Sokolow (deitarion/SSokolow)"
__appname__ = "Mass Binary Diff"
__version__ = "0.0pre0"
__license__ = "MIT"
import logging, sys
from itertools import groupby
import numpy
log = logging.getLogger(__name__)
def compare(paths, skip_head=0):
"""Identify bits which vary across multiple copies of the same file"""
bad_bytes = set()
rom1 = numpy.fromfile(paths[0], numpy.uint8)[skip_head:]
# Find bytes that differ between runs
file_count = len(paths)
for path in paths[1:]:
rom2 = numpy.fromfile(path, numpy.uint8)[skip_head:]
if rom1.size != rom2.size:
log.warn("WARNING: Size mismatch (%s != %s). "
"Skipping %r...", rom1.size, rom2.size, path)
file_count -= 1
continue
bad_bits = rom1 ^ rom2
mismatch_indices = numpy.nonzero(bad_bits)[0]
for index in mismatch_indices:
bad_bytes.add((index, bad_bits[index]))
del bad_bits
# If a byte had different bits vary across different runs, merge the
# resulting "which are bad" masks using bitwise OR.
bad_bytes2 = set()
for offset, group in groupby(sorted(bad_bytes), lambda x: x[0]):
combined = 0
for offset, bad_bits in group:
combined |= bad_bits
bad_bytes2.add((offset, combined))
return list(sorted(bad_bytes2)), {
'file_count': file_count,
'file_len': rom1.size
}
# TODO: This visualization helps to identify potential bad cells or noisy
# data lines, but I should also make it multi-column to show patterns
# in the addresses where problems are showing up.
def print_table(rows, stats):
"""Render the output of compare() for human inspection."""
col1_len = len(hex(stats['file_len']))
row_lengths = [x[0] for x in rows] or [0]
assert stats['file_len'] > max(row_lengths)
bad_bit_count = sum(bin(x[1]).count('1') for x in rows)
stats_lines = [
" {}k bytes examined ".format(stats['file_len'] // 1024),
" {} copies correlated ".format(stats['file_count']),
" {} unstable bits found ".format(bad_bit_count)
]
max_line_len = max(len(x) for x in stats_lines)
# Calculate the width of the first column and pre-render a divider row long
# enough to look good above `sum_line`.
divider = "-{}-+----------".format("-" * col1_len)
if len(divider) < max_line_len:
divider += '-' * (max_line_len - len(divider))
print(" {:>{col1_len}} | Bad Bits ".format("Offset", col1_len=col1_len))
print(divider)
for line in rows:
print(" {:#{col1_len}x} | {:08b} ".format(*line, col1_len=col1_len))
print(divider)
print('\n'.join(stats_lines))
def main():
"""The main entry point, compatible with setuptools entry points."""
from argparse import ArgumentParser, RawDescriptionHelpFormatter
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
description=__doc__.replace('\r\n', '\n').split('\n--snip--\n')[0])
parser.add_argument('--version', action='version',
version="%%(prog)s v%s" % __version__)
parser.add_argument('-v', '--verbose', action="count",
default=2, help="Increase the verbosity. Use twice for extra effect.")
parser.add_argument('-q', '--quiet', action="count",
default=0, help="Decrease the verbosity. Use twice for extra effect.")
parser.add_argument('--skip-head', action="store", type=int,
default=0, help="Specify the number of bytes at the beginning of the "
"file to exclude from comparison. (eg. specify `16` "
"to omit the iNES header when calculating offsets in "
"an NES ROM.)")
parser.add_argument('path', action='store', nargs='+',
default=[], help="Path to a ROM to be diffed")
args = parser.parse_args()
# Set up clean logging to stderr
log_levels = [logging.CRITICAL, logging.ERROR, logging.WARNING,
logging.INFO, logging.DEBUG]
args.verbose = min(args.verbose - args.quiet, len(log_levels) - 1)
args.verbose = max(args.verbose, 0)
logging.basicConfig(level=log_levels[args.verbose],
format='%(levelname)s: %(message)s')
if len(args.path) < 2:
log.critical("A minimum of two input files are required. Exiting.")
sys.exit(1)
bad_bits, stats = compare(args.path, skip_head=args.skip_head)
print_table(bad_bits, stats)
if __name__ == '__main__':
main()
# vim: set sw=4 sts=4 expandtab :
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment