Skip to content

Instantly share code, notes, and snippets.

@dadatuputi
Last active October 21, 2020 23:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dadatuputi/e689f843df96c0ce97d620fa0c4a500c to your computer and use it in GitHub Desktop.
Save dadatuputi/e689f843df96c0ce97d620fa0c4a500c to your computer and use it in GitHub Desktop.
Talent Marketplace diff output
#!/usr/bin/env python3
import xlrd, argparse, openpyxl
import pandas as pd
SUFFIX_OLD = '_old'
SUFFIX_NEW = '_new'
def diff(f1, f2, o):
df1 = pd.read_excel(f1, sheet_name=0, header=0, index_col=0)
df2 = pd.read_excel(f2, sheet_name=0, header=0, index_col=0)
dif = df1.merge(df2, left_index=True, right_index=True, how='outer', indicator=True, suffixes=(SUFFIX_OLD, SUFFIX_NEW))
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.expand_frame_repr', False):
# Print deleted positions
dif_x = dif.loc[dif['_merge'] == 'left_only']
cols_x = [c for c in dif_x.columns if c.endswith(SUFFIX_OLD)]
dif_x = dif_x[cols_x]
print('Deleted Positions: {}'.format(len(dif_x)))
if not o and not dif_x.empty:
print('\n')
print(dif_x)
print('\n\n')
# Print added positions
dif_y = dif.loc[dif['_merge'] == 'right_only']
cols_y = [c for c in dif_y.columns if c.endswith(SUFFIX_NEW)]
dif_y = dif_y[cols_y]
print('New Positions: {}'.format(len(dif_y)))
if not o and not dif_y.empty:
print('\n')
print(dif_y)
print('\n\n')
# Print changed positions
diff_ids = []
dif_both = dif.loc[dif['_merge'] == 'both']
for index, row in dif_both.iterrows():
row_x = row[[c for c in cols_x if not c.startswith('My Favorites')]].to_frame().T.to_csv(header=None)
row_y = row[[c for c in cols_y if not c.startswith('My Favorites')]].to_frame().T.to_csv(header=None)
if row_x.lower() != row_y.lower():
diff_ids.append(index)
dif_both = dif_both.loc[diff_ids]
print('Changed Positions: {}'.format(len(diff_ids)))
if not o and not dif_both.empty:
print('\n')
print(dif_both)
print('\n\n')
# Write to file
if o:
with pd.ExcelWriter(o) as writer: # pylint: disable=abstract-class-instantiated
dif_x.to_excel(writer, sheet_name='deleted')
dif_y.to_excel(writer, sheet_name='new')
dif_both.to_excel(writer, sheet_name='changed')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Compare two outputs from Talent Marketplace")
parser.add_argument('baseline', type=argparse.FileType('rb'), help='path to a xlsx file taken earlier')
parser.add_argument('update', type=argparse.FileType('rb'), help='path to a xlsx file taken later')
parser.add_argument('-o', '--output', help='file to write output to', metavar="output.xlsx")
args = parser.parse_args()
diff(args.baseline, args.update, args.output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment