Created
July 24, 2023 14:59
-
-
Save jilleb/1b436dc5c62885f794ee40d44f0ff03d to your computer and use it in GitHub Desktop.
Compare a column of 1 excel file with another column in an excel file and write the contents to a .csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import argparse | |
def compare_excels(excel_file1, column_index1, excel_file2, column_index2, output_file): | |
try: | |
# Read the Excel files into pandas DataFrames | |
df1 = pd.read_excel(excel_file1) | |
df2 = pd.read_excel(excel_file2) | |
# Extract the values from the specified columns in Excel 1 and Excel 2 | |
values_range1 = set(df1.iloc[:, column_index1]) | |
values_range2 = set(df2.iloc[:, column_index2]) | |
# Find the values present in range1 but missing in range2 | |
missing_values = values_range1.difference(values_range2) | |
# Accumulate the missing rows in a single DataFrame | |
missing_rows_df = pd.DataFrame(columns=df1.columns) | |
if missing_values: | |
print("Values missing in Excel 2:") | |
for value in missing_values: | |
print(value) | |
missing_rows = df1[df1.iloc[:, column_index1] == value] | |
missing_rows_df = pd.concat([missing_rows_df, missing_rows]) | |
# Save the missing rows DataFrame to a CSV file | |
if not missing_rows_df.empty: | |
missing_rows_df.to_csv(output_file, index=False) | |
print("Missing rows have been saved to", output_file) | |
else: | |
print("No missing values found.") | |
except Exception as e: | |
print("Error:", str(e)) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Compare two Excel files and find missing values.") | |
parser.add_argument("file1", help="Path to the first Excel file.") | |
parser.add_argument("column_index1", type=int, help="Column index for File 1 (0-based index).") | |
parser.add_argument("file2", help="Path to the second Excel file.") | |
parser.add_argument("column_index2", type=int, help="Column index for File 2 (0-based index).") | |
parser.add_argument("output_file", help="Output CSV file name.") | |
args = parser.parse_args() | |
compare_excels(args.file1, args.column_index1, args.file2, args.column_index2, args.output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
usage:
python excel_compare_output_csv.py file1.xlsx 0 file2.xlsx 4 temp.csv