Created
October 4, 2019 06:44
-
-
Save ssokolow/4d7edbb15f8304b0ce3f830808de73e4 to your computer and use it in GitHub Desktop.
Quick helper script to cross-compare large numbers of dumps of a ROM or disk to identify bits that vary between dumps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
"""Simple tool to identify bad bits in a cartridge by comparing many dumps | |
--snip-- | |
Requires numpy. | |
""" | |
# Silence Pylint/Flake8 complaints if run under Python 2 | |
from __future__ import (absolute_import, division, print_function, | |
with_statement, unicode_literals) | |
__author__ = "Stephan Sokolow (deitarion/SSokolow)" | |
__appname__ = "Mass Binary Diff" | |
__version__ = "0.0pre0" | |
__license__ = "MIT" | |
import logging, sys | |
from itertools import groupby | |
import numpy | |
log = logging.getLogger(__name__) | |
def compare(paths, skip_head=0): | |
"""Identify bits which vary across multiple copies of the same file""" | |
bad_bytes = set() | |
rom1 = numpy.fromfile(paths[0], numpy.uint8)[skip_head:] | |
# Find bytes that differ between runs | |
file_count = len(paths) | |
for path in paths[1:]: | |
rom2 = numpy.fromfile(path, numpy.uint8)[skip_head:] | |
if rom1.size != rom2.size: | |
log.warn("WARNING: Size mismatch (%s != %s). " | |
"Skipping %r...", rom1.size, rom2.size, path) | |
file_count -= 1 | |
continue | |
bad_bits = rom1 ^ rom2 | |
mismatch_indices = numpy.nonzero(bad_bits)[0] | |
for index in mismatch_indices: | |
bad_bytes.add((index, bad_bits[index])) | |
del bad_bits | |
# If a byte had different bits vary across different runs, merge the | |
# resulting "which are bad" masks using bitwise OR. | |
bad_bytes2 = set() | |
for offset, group in groupby(sorted(bad_bytes), lambda x: x[0]): | |
combined = 0 | |
for offset, bad_bits in group: | |
combined |= bad_bits | |
bad_bytes2.add((offset, combined)) | |
return list(sorted(bad_bytes2)), { | |
'file_count': file_count, | |
'file_len': rom1.size | |
} | |
# TODO: This visualization helps to identify potential bad cells or noisy | |
# data lines, but I should also make it multi-column to show patterns | |
# in the addresses where problems are showing up. | |
def print_table(rows, stats): | |
"""Render the output of compare() for human inspection.""" | |
col1_len = len(hex(stats['file_len'])) | |
row_lengths = [x[0] for x in rows] or [0] | |
assert stats['file_len'] > max(row_lengths) | |
bad_bit_count = sum(bin(x[1]).count('1') for x in rows) | |
stats_lines = [ | |
" {}k bytes examined ".format(stats['file_len'] // 1024), | |
" {} copies correlated ".format(stats['file_count']), | |
" {} unstable bits found ".format(bad_bit_count) | |
] | |
max_line_len = max(len(x) for x in stats_lines) | |
# Calculate the width of the first column and pre-render a divider row long | |
# enough to look good above `sum_line`. | |
divider = "-{}-+----------".format("-" * col1_len) | |
if len(divider) < max_line_len: | |
divider += '-' * (max_line_len - len(divider)) | |
print(" {:>{col1_len}} | Bad Bits ".format("Offset", col1_len=col1_len)) | |
print(divider) | |
for line in rows: | |
print(" {:#{col1_len}x} | {:08b} ".format(*line, col1_len=col1_len)) | |
print(divider) | |
print('\n'.join(stats_lines)) | |
def main(): | |
"""The main entry point, compatible with setuptools entry points.""" | |
from argparse import ArgumentParser, RawDescriptionHelpFormatter | |
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, | |
description=__doc__.replace('\r\n', '\n').split('\n--snip--\n')[0]) | |
parser.add_argument('--version', action='version', | |
version="%%(prog)s v%s" % __version__) | |
parser.add_argument('-v', '--verbose', action="count", | |
default=2, help="Increase the verbosity. Use twice for extra effect.") | |
parser.add_argument('-q', '--quiet', action="count", | |
default=0, help="Decrease the verbosity. Use twice for extra effect.") | |
parser.add_argument('--skip-head', action="store", type=int, | |
default=0, help="Specify the number of bytes at the beginning of the " | |
"file to exclude from comparison. (eg. specify `16` " | |
"to omit the iNES header when calculating offsets in " | |
"an NES ROM.)") | |
parser.add_argument('path', action='store', nargs='+', | |
default=[], help="Path to a ROM to be diffed") | |
args = parser.parse_args() | |
# Set up clean logging to stderr | |
log_levels = [logging.CRITICAL, logging.ERROR, logging.WARNING, | |
logging.INFO, logging.DEBUG] | |
args.verbose = min(args.verbose - args.quiet, len(log_levels) - 1) | |
args.verbose = max(args.verbose, 0) | |
logging.basicConfig(level=log_levels[args.verbose], | |
format='%(levelname)s: %(message)s') | |
if len(args.path) < 2: | |
log.critical("A minimum of two input files are required. Exiting.") | |
sys.exit(1) | |
bad_bits, stats = compare(args.path, skip_head=args.skip_head) | |
print_table(bad_bits, stats) | |
if __name__ == '__main__': | |
main() | |
# vim: set sw=4 sts=4 expandtab : |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment