Created
July 15, 2014 20:31
-
-
Save mikofski/a39be947dd3bb7012470 to your computer and use it in GitHub Desktop.
diff of excel file in Git repository index and working copy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
""" | |
diff of excel files | |
""" | |
from dulwich.repo import Repo # import Repo object from dulwich | |
import sys | |
import os | |
from openpyxl import load_workbook | |
from StringIO import StringIO | |
def diff_excel(filename, sheets, repo='.'): | |
""" | |
diff excel file | |
""" | |
try: | |
wb_new = load_workbook(filename) | |
except: | |
raise IOError('"%s" does not exist' % filename) | |
r = Repo(repo) # open repository, raises not a repository | |
idx = r.open_index() # open index to get excel file's sha | |
f = idx[filename] # get excel file info from index | |
b = r[f.sha] # get blob of excel file | |
s = StringIO(b) # file like object of excel file's blob data | |
# StringIO(b) actually returns StringIO(b.data) | |
wb_old = load_workbook(s) | |
for sh in sheets: | |
print 'sheet: %s' % sh | |
try: | |
ws_old = wb_old.get_sheet_by_name(sh) | |
except: | |
print 'old workbook does not have sheet: "%s"' % sh | |
try: | |
ws_new = wb_new.get_sheet_by_name(sh) | |
except: | |
print 'new workbook does not have sheet: "%s"' % sh | |
n = 0 | |
for rold, rnew in zip(ws_old.rows, ws_new.rows): | |
rvlist = [rv.value for rv in rold] | |
fmt = ('old row: %5d' % (n + 1)) + ''.join(['\t%s'] * len(rvlist)) | |
print fmt % tuple(rvlist) | |
rvlist = [rv.value for rv in rnew] | |
fmt = ('new row: %5d' % (n + 1)) + ''.join(['\t%s'] * len(rvlist)) | |
print fmt % tuple(rvlist) | |
n += 1 | |
if n < len(ws_old.rows): | |
print 'old file has %d extra rows' % (len(ws_old.rows) - n) | |
elif n < len(ws_new.rows): | |
print 'new file has %d extra rows' % (len(ws_new.rows) - n) | |
if __name__ == '__main__': | |
diff_excel(filename=sys.argv[1], sheets=sys.argv[2:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment