ssokolow/magog_updates_filter.py

## magog_updates_filter.py
#!/usr/bin/env python
"""This script should work as a way to extract only changed lines from a
pair of MaGOG dumps in a format that can be imported into a spreadsheet
tool.
"""

# Minimal argument-reading for proof of concept (I'd use argparse instead)
from sys import argv
import difflib, re

# Read the before and after files
with open(argv[1]) as fobj_before:
    before = fobj_before.readlines()
with open(argv[2]) as fobj_after:
    after = fobj_after.readlines()

# Primitive "natural sort" for IDs so they sort properly rather than as
# (1, 10, 11, ..., 2, 20, 21, ...)
def id_sort(line):
    raw = line.split('|', 1)[0].strip()
    try:
        return int(raw, 10)
    except ValueError:
        # Sort "id" header at the top if present
        return -99999999

# Ensure diff doesn't get confused by different ordering
before.sort(key=id_sort)
after.sort(key=id_sort)

# Find the lines which differ
diff = difflib.unified_diff(before, after, n=0)

for pos, line in enumerate(diff):
    line = line.strip()  # Remove leading/trailing whitespace
    if not line or line[0:3] in ('---', '+++', '@@ '):
        continue  # Skip empty lines and headers

    # Convert the added/removed prefixes into a new column
    line = '%s|%s' % (line[0], line[1:])
    print(line)
	#!/usr/bin/env python
	"""This script should work as a way to extract only changed lines from a
	pair of MaGOG dumps in a format that can be imported into a spreadsheet
	tool.
	"""

	# Minimal argument-reading for proof of concept (I'd use argparse instead)
	from sys import argv
	import difflib, re

	# Read the before and after files
	with open(argv[1]) as fobj_before:
	before = fobj_before.readlines()
	with open(argv[2]) as fobj_after:
	after = fobj_after.readlines()

	# Primitive "natural sort" for IDs so they sort properly rather than as
	# (1, 10, 11, ..., 2, 20, 21, ...)
	def id_sort(line):
	raw = line.split('\|', 1)[0].strip()
	try:
	return int(raw, 10)
	except ValueError:
	# Sort "id" header at the top if present
	return -99999999

	# Ensure diff doesn't get confused by different ordering
	before.sort(key=id_sort)
	after.sort(key=id_sort)

	# Find the lines which differ
	diff = difflib.unified_diff(before, after, n=0)

	for pos, line in enumerate(diff):
	line = line.strip() # Remove leading/trailing whitespace
	if not line or line[0:3] in ('---', '+++', '@@ '):
	continue # Skip empty lines and headers

	# Convert the added/removed prefixes into a new column
	line = '%s\|%s' % (line[0], line[1:])
	print(line)