Last active
October 21, 2020 23:39
-
-
Save dadatuputi/e689f843df96c0ce97d620fa0c4a500c to your computer and use it in GitHub Desktop.
Talent Marketplace diff output
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import xlrd, argparse, openpyxl | |
import pandas as pd | |
SUFFIX_OLD = '_old' | |
SUFFIX_NEW = '_new' | |
def diff(f1, f2, o): | |
df1 = pd.read_excel(f1, sheet_name=0, header=0, index_col=0) | |
df2 = pd.read_excel(f2, sheet_name=0, header=0, index_col=0) | |
dif = df1.merge(df2, left_index=True, right_index=True, how='outer', indicator=True, suffixes=(SUFFIX_OLD, SUFFIX_NEW)) | |
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.expand_frame_repr', False): | |
# Print deleted positions | |
dif_x = dif.loc[dif['_merge'] == 'left_only'] | |
cols_x = [c for c in dif_x.columns if c.endswith(SUFFIX_OLD)] | |
dif_x = dif_x[cols_x] | |
print('Deleted Positions: {}'.format(len(dif_x))) | |
if not o and not dif_x.empty: | |
print('\n') | |
print(dif_x) | |
print('\n\n') | |
# Print added positions | |
dif_y = dif.loc[dif['_merge'] == 'right_only'] | |
cols_y = [c for c in dif_y.columns if c.endswith(SUFFIX_NEW)] | |
dif_y = dif_y[cols_y] | |
print('New Positions: {}'.format(len(dif_y))) | |
if not o and not dif_y.empty: | |
print('\n') | |
print(dif_y) | |
print('\n\n') | |
# Print changed positions | |
diff_ids = [] | |
dif_both = dif.loc[dif['_merge'] == 'both'] | |
for index, row in dif_both.iterrows(): | |
row_x = row[[c for c in cols_x if not c.startswith('My Favorites')]].to_frame().T.to_csv(header=None) | |
row_y = row[[c for c in cols_y if not c.startswith('My Favorites')]].to_frame().T.to_csv(header=None) | |
if row_x.lower() != row_y.lower(): | |
diff_ids.append(index) | |
dif_both = dif_both.loc[diff_ids] | |
print('Changed Positions: {}'.format(len(diff_ids))) | |
if not o and not dif_both.empty: | |
print('\n') | |
print(dif_both) | |
print('\n\n') | |
# Write to file | |
if o: | |
with pd.ExcelWriter(o) as writer: # pylint: disable=abstract-class-instantiated | |
dif_x.to_excel(writer, sheet_name='deleted') | |
dif_y.to_excel(writer, sheet_name='new') | |
dif_both.to_excel(writer, sheet_name='changed') | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Compare two outputs from Talent Marketplace") | |
parser.add_argument('baseline', type=argparse.FileType('rb'), help='path to a xlsx file taken earlier') | |
parser.add_argument('update', type=argparse.FileType('rb'), help='path to a xlsx file taken later') | |
parser.add_argument('-o', '--output', help='file to write output to', metavar="output.xlsx") | |
args = parser.parse_args() | |
diff(args.baseline, args.update, args.output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment