Last active
January 12, 2021 07:51
-
-
Save ryanbehdad/02e69c65607c68b385a0b0b6b7a975cd to your computer and use it in GitHub Desktop.
Compare the columns of two dataframes (including their dtypes)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def compare_df_columns(df1, df2): | |
""" | |
Compare the columns of two dataframes (including their types) | |
""" | |
matched = True | |
# Compare number of rows | |
if df1.shape[0] != df2.shape[0]: | |
print(f'Row numbers do not match {df1.shape[0]:,} vs {df2.shape[0]:,}') | |
matched=False | |
# Compare number of columns | |
if df1.shape[1] != df2.shape[1]: | |
print(f'Column numbers do not match {df1.shape[1]:,} vs {df2.shape[1]:,}') | |
matched=False | |
# Compare columns | |
diff = list(set(df1.columns) ^ set(df2.columns)) | |
if len(diff) > 0: | |
print('Columns appearing only in one of the dataframes:', diff) | |
matched = False | |
# Compare dtypes | |
df1_cols = df1.dtypes.to_dict() | |
df2_cols = df2.dtypes.to_dict() | |
for key, value in df1_cols.items(): | |
if key in df2_cols: | |
if df1_cols[key] != df2_cols[key]: | |
print(f'Type of {key} is different: {df1_cols[key]} vs {df2_cols[key]}') | |
matched = False | |
else: | |
print(f'{key} --> not found' ) | |
matched = False | |
return matched |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment