Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Asymmetric difference of two pandas Dataframes
def diff_df(df1, df2, how="left"):
"""
Find Difference of rows for given two dataframes
this function is not symmetric, means
diff(x, y) != diff(y, x)
however
diff(x, y, how='left') == diff(y, x, how='right')
Ref: https://stackoverflow.com/questions/18180763/set-difference-for-pandas/40209800#40209800
"""
if (df1.columns != df2.columns).any():
raise ValueError("Two dataframe columns must match")
if df1.equals(df2):
return None
elif how == 'right':
return pd.concat([df2, df1, df1]).drop_duplicates(keep=False)
elif how == 'left':
return pd.concat([df1, df2, df2]).drop_duplicates(keep=False)
else:
raise ValueError('how parameter supports only "left" or "right keywords"')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.