Forked from toddbirchard/pandas_dataframe_difference.py
Created
June 8, 2020 15:57
-
-
Save afr-dt/8d5e4656dd53abc15a6a221f1736c00e to your computer and use it in GitHub Desktop.
Helper function to compare two DataFrames and find rows which are unique or shared.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dataframe_difference(df1, df2, which=None): | |
"""Find rows which are different.""" | |
comparison_df = df1.merge(df2, | |
indicator=True, | |
how='outer') | |
if which is None: | |
diff_df = comparison_df[comparison_df['_merge'] != 'both'] | |
else: | |
diff_df = comparison_df[comparison_df['_merge'] == which] | |
diff_df.to_csv('data/diff.csv') | |
return diff_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment