Skip to content

Instantly share code, notes, and snippets.

@pR0Ps
Last active January 8, 2023 16:43
Show Gist options
  • Save pR0Ps/1b368e7de466d709af008225c60604dc to your computer and use it in GitHub Desktop.
Save pR0Ps/1b368e7de466d709af008225c60604dc to your computer and use it in GitHub Desktop.
Show binary diff of files while handling inserted/removed data
#!/usr/bin/env python
"""
Shows a binary diff of files. Handles cases where data was inserted/removed
instead of just modified in place to avoid showing that the rest of a file
after a modification was changed.
WARNING: The algorithm used to generate the diff is quadratic in the expected
case and cubic in the worst case. Do not run this on large files unless you
want to wait for a *very* long time. Additionally, because it was only meant to
analyze small files, all data from the provided files will be read completely
into memory for analysis.
"""
import difflib
import itertools
import sys
WIDTH = 16
PADDING_ASCII = "-"
PADDING_BYTES = "--"
BEFORE = "\033[31m" # red
AFTER = "\033[32m" # green
RESET = "\033[0m" # reset
assert WIDTH > 0
assert len(PADDING_BYTES) == 2
assert len(PADDING_ASCII) == 1
class Char:
def __init__(self, char=None):
self.char = char
self.color_set = None
self.color_unset = None
def get_ascii(self):
return "".join(
(
self.color_set or "",
PADDING_ASCII
if self.char is None
else chr(self.char)
if 32 <= self.char <= 126
else ".",
self.color_unset or "",
)
)
def get_bytes(self):
return "".join(
(
self.color_set or "",
PADDING_BYTES if self.char is None else "{:02X}".format(self.char),
self.color_unset or "",
)
)
__repr__ = get_bytes
def print_output(line_num, data, start=None, end=None):
for x in range(2):
d = data[x][start:end]
print(
"{line:06X}|{data_bytes}{padding}|{data_ascii}".format(
line=line_num * WIDTH,
data_bytes=" ".join(c.get_bytes() for c in d),
padding=" " * (WIDTH - len(d)),
data_ascii="".join(c.get_ascii() for c in d),
)
)
def do_diff(data1, data2, show_colors=True):
# headers
print(
"offset|{headers}|ASCII".format(
headers=" ".join(Char(x).get_bytes() for x in range(WIDTH))
)
)
print("------+{}+{}".format("-" * (WIDTH * 3 - 1), "-" * WIDTH))
s = difflib.SequenceMatcher(None, data1, data2, autojunk=False)
buff = [[], []]
line_num = 0
for tag, i1, i2, j1, j2 in s.get_opcodes():
tmp = [
[Char(x) for x in data1[i1:i2]],
[Char(x) for x in data2[j1:j2]],
]
# Pad to same len
pad = len(tmp[1]) - len(tmp[0])
if pad > 0:
tmp[0].extend([Char() for _ in range(pad)])
elif pad < 0:
tmp[1].extend([Char() for _ in range(pad * -1)])
# colorize hex output
b_len = len(buff[0])
t_len = len(tmp[0])
# inset color markers at start/end of ranges
# then at points where the output will wrap onto future lines
# takes into account the length of the buffer to prepend
if show_colors:
color = [BEFORE, AFTER] if tag != "equal" else [RESET, RESET]
for x in itertools.chain([0], range(WIDTH - b_len, t_len, WIDTH)):
for y in range(2):
tmp[y][x].color_set = color[y]
tmp[y][x - 1].color_unset = RESET
# Prepend previous leftover buffer to output
for x in range(2):
tmp[x] = buff[x] + tmp[x]
# Print all complete lines
out_len = len(tmp[0])
for x in range(out_len // WIDTH):
print_output(line_num, tmp, x * WIDTH, x * WIDTH + WIDTH)
line_num += 1
# Add leftover data
missed = out_len % WIDTH
for x in range(2):
buff[x] = tmp[x][-missed:] if missed else []
# print any remaining buffer
if len(buff[0]):
print_output(line_num, buff)
def main():
import argparse
parser = argparse.ArgumentParser(
description="Show a colored binary diff of 2 files"
)
parser.add_argument("file1")
parser.add_argument("file2")
parser.add_argument("--color", action=argparse.BooleanOptionalAction)
args = parser.parse_args()
if args.color is None:
args.color = sys.stdout.isatty()
with open(args.file1, "rb") as f1:
with open(args.file2, "rb") as f2:
do_diff(f1.read(), f2.read(), show_colors=args.color)
return 0
if __name__ == "__main__":
sys.exit(main())
@pR0Ps
Copy link
Author

pR0Ps commented Jan 8, 2023

Output example

output

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment