Create a gist now

Instantly share code, notes, and snippets.

Minimal prototype pre-differ for MaGOG dumps
#!/usr/bin/env python
"""This script should work as a way to extract only changed lines from a
pair of MaGOG dumps in a format that can be imported into a spreadsheet
# Minimal argument-reading for proof of concept (I'd use argparse instead)
from sys import argv
import difflib, re
# Read the before and after files
with open(argv[1]) as fobj_before:
before = fobj_before.readlines()
with open(argv[2]) as fobj_after:
after = fobj_after.readlines()
# Primitive "natural sort" for IDs so they sort properly rather than as
# (1, 10, 11, ..., 2, 20, 21, ...)
def id_sort(line):
raw = line.split('|', 1)[0].strip()
return int(raw, 10)
except ValueError:
# Sort "id" header at the top if present
return -99999999
# Ensure diff doesn't get confused by different ordering
# Find the lines which differ
diff = difflib.unified_diff(before, after, n=0)
for pos, line in enumerate(diff):
line = line.strip() # Remove leading/trailing whitespace
if not line or line[0:3] in ('---', '+++', '@@ '):
continue # Skip empty lines and headers
# Convert the added/removed prefixes into a new column
line = '%s|%s' % (line[0], line[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment