Skip to content

Instantly share code, notes, and snippets.

@jilleb
Created July 24, 2023 14:59
Show Gist options
  • Save jilleb/1b436dc5c62885f794ee40d44f0ff03d to your computer and use it in GitHub Desktop.
Save jilleb/1b436dc5c62885f794ee40d44f0ff03d to your computer and use it in GitHub Desktop.
Compare a column of 1 excel file with another column in an excel file and write the contents to a .csv
import pandas as pd
import argparse
def compare_excels(excel_file1, column_index1, excel_file2, column_index2, output_file):
try:
# Read the Excel files into pandas DataFrames
df1 = pd.read_excel(excel_file1)
df2 = pd.read_excel(excel_file2)
# Extract the values from the specified columns in Excel 1 and Excel 2
values_range1 = set(df1.iloc[:, column_index1])
values_range2 = set(df2.iloc[:, column_index2])
# Find the values present in range1 but missing in range2
missing_values = values_range1.difference(values_range2)
# Accumulate the missing rows in a single DataFrame
missing_rows_df = pd.DataFrame(columns=df1.columns)
if missing_values:
print("Values missing in Excel 2:")
for value in missing_values:
print(value)
missing_rows = df1[df1.iloc[:, column_index1] == value]
missing_rows_df = pd.concat([missing_rows_df, missing_rows])
# Save the missing rows DataFrame to a CSV file
if not missing_rows_df.empty:
missing_rows_df.to_csv(output_file, index=False)
print("Missing rows have been saved to", output_file)
else:
print("No missing values found.")
except Exception as e:
print("Error:", str(e))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Compare two Excel files and find missing values.")
parser.add_argument("file1", help="Path to the first Excel file.")
parser.add_argument("column_index1", type=int, help="Column index for File 1 (0-based index).")
parser.add_argument("file2", help="Path to the second Excel file.")
parser.add_argument("column_index2", type=int, help="Column index for File 2 (0-based index).")
parser.add_argument("output_file", help="Output CSV file name.")
args = parser.parse_args()
compare_excels(args.file1, args.column_index1, args.file2, args.column_index2, args.output_file)
@jilleb
Copy link
Author

jilleb commented Jul 24, 2023

usage:

python excel_compare_output_csv.py file1.xlsx 0 file2.xlsx 4 temp.csv

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment