Created
June 22, 2016 15:16
-
-
Save ssokolow/93e43b2fc532fab9cb166a4d01dce0b2 to your computer and use it in GitHub Desktop.
Minimal prototype pre-differ for MaGOG dumps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""This script should work as a way to extract only changed lines from a | |
pair of MaGOG dumps in a format that can be imported into a spreadsheet | |
tool. | |
""" | |
# Minimal argument-reading for proof of concept (I'd use argparse instead) | |
from sys import argv | |
import difflib, re | |
# Read the before and after files | |
with open(argv[1]) as fobj_before: | |
before = fobj_before.readlines() | |
with open(argv[2]) as fobj_after: | |
after = fobj_after.readlines() | |
# Primitive "natural sort" for IDs so they sort properly rather than as | |
# (1, 10, 11, ..., 2, 20, 21, ...) | |
def id_sort(line): | |
raw = line.split('|', 1)[0].strip() | |
try: | |
return int(raw, 10) | |
except ValueError: | |
# Sort "id" header at the top if present | |
return -99999999 | |
# Ensure diff doesn't get confused by different ordering | |
before.sort(key=id_sort) | |
after.sort(key=id_sort) | |
# Find the lines which differ | |
diff = difflib.unified_diff(before, after, n=0) | |
for pos, line in enumerate(diff): | |
line = line.strip() # Remove leading/trailing whitespace | |
if not line or line[0:3] in ('---', '+++', '@@ '): | |
continue # Skip empty lines and headers | |
# Convert the added/removed prefixes into a new column | |
line = '%s|%s' % (line[0], line[1:]) | |
print(line) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment