#!/usr/bin/env python | |
"""This script should work as a way to extract only changed lines from a | |
pair of MaGOG dumps in a format that can be imported into a spreadsheet | |
tool. | |
""" | |
# Minimal argument-reading for proof of concept (I'd use argparse instead) | |
from sys import argv | |
import difflib, re | |
# Read the before and after files | |
with open(argv[1]) as fobj_before: | |
before = fobj_before.readlines() | |
with open(argv[2]) as fobj_after: | |
after = fobj_after.readlines() | |
# Primitive "natural sort" for IDs so they sort properly rather than as | |
# (1, 10, 11, ..., 2, 20, 21, ...) | |
def id_sort(line): | |
raw = line.split('|', 1)[0].strip() | |
try: | |
return int(raw, 10) | |
except ValueError: | |
# Sort "id" header at the top if present | |
return -99999999 | |
# Ensure diff doesn't get confused by different ordering | |
before.sort(key=id_sort) | |
after.sort(key=id_sort) | |
# Find the lines which differ | |
diff = difflib.unified_diff(before, after, n=0) | |
for pos, line in enumerate(diff): | |
line = line.strip() # Remove leading/trailing whitespace | |
if not line or line[0:3] in ('---', '+++', '@@ '): | |
continue # Skip empty lines and headers | |
# Convert the added/removed prefixes into a new column | |
line = '%s|%s' % (line[0], line[1:]) | |
print(line) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment