Skip to content

Instantly share code, notes, and snippets.

@ryanbehdad
Last active January 12, 2021 07:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryanbehdad/02e69c65607c68b385a0b0b6b7a975cd to your computer and use it in GitHub Desktop.
Save ryanbehdad/02e69c65607c68b385a0b0b6b7a975cd to your computer and use it in GitHub Desktop.
Compare the columns of two dataframes (including their dtypes)
def compare_df_columns(df1, df2):
"""
Compare the columns of two dataframes (including their types)
"""
matched = True
# Compare number of rows
if df1.shape[0] != df2.shape[0]:
print(f'Row numbers do not match {df1.shape[0]:,} vs {df2.shape[0]:,}')
matched=False
# Compare number of columns
if df1.shape[1] != df2.shape[1]:
print(f'Column numbers do not match {df1.shape[1]:,} vs {df2.shape[1]:,}')
matched=False
# Compare columns
diff = list(set(df1.columns) ^ set(df2.columns))
if len(diff) > 0:
print('Columns appearing only in one of the dataframes:', diff)
matched = False
# Compare dtypes
df1_cols = df1.dtypes.to_dict()
df2_cols = df2.dtypes.to_dict()
for key, value in df1_cols.items():
if key in df2_cols:
if df1_cols[key] != df2_cols[key]:
print(f'Type of {key} is different: {df1_cols[key]} vs {df2_cols[key]}')
matched = False
else:
print(f'{key} --> not found' )
matched = False
return matched
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment